In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
import os
import sys

%matplotlib inline

### read the labels to ppg signals

In [3]:
labels = ["ID", "Gender", "Age", "Glucose", "Height", "Weight"]
labels_df = pd.read_csv("PPG_Dataset/Labels/total_labels.csv", names= labels)

In [4]:
labels_df.head()

Unnamed: 0,ID,Gender,Age,Glucose,Height,Weight
0,'1','Male',38,99,180,53
1,'1','Male',38,102,180,53
2,'1','Male',38,103,180,53
3,'1','Male',38,128,180,53
4,'1','Male',38,130,180,53


In [5]:
# the range of glucose reading
labels_df["Glucose"].describe()

count     67.000000
mean     115.014925
std       18.736201
min       88.000000
25%      102.500000
50%      110.000000
75%      125.500000
max      183.000000
Name: Glucose, dtype: float64

In [6]:
labels_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67 entries, 0 to 66
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   ID       67 non-null     object
 1   Gender   67 non-null     object
 2   Age      67 non-null     int64 
 3   Glucose  67 non-null     int64 
 4   Height   67 non-null     int64 
 5   Weight   67 non-null     int64 
dtypes: int64(4), object(2)
memory usage: 3.3+ KB


In [7]:
labels_df.loc[(labels_df["Glucose"] >= 150) & (labels_df["Glucose"] <= 160 )] 

Unnamed: 0,ID,Gender,Age,Glucose,Height,Weight


In [8]:
# get another catagory for glucose level
levels = [1, 2, 3, 4, 5, 6, 7, 8, 9]
labels_df["Glucose_Level"] = pd.cut(labels_df["Glucose"],
       bins=[87, 95, 100, 105, 110, 120, 130, 140, 150, 190],
      labels=levels) 

In [9]:
# clean ID column from the ''
labels_df["ID"] = labels_df["ID"].str.extract(r"(\d+)").astype(int) 

In [22]:
labels_df.head(5)

Unnamed: 0,ID,Gender,Age,Glucose,Height,Weight,Glucose_Level
0,1,'Male',38,99,180,53,2
1,1,'Male',38,102,180,53,3
2,1,'Male',38,103,180,53,3
3,1,'Male',38,128,180,53,6
4,1,'Male',38,130,180,53,6


In [23]:
# put the label datafram in the write order
s_labels_df = labels_df.sort_values(by='ID')

In [25]:
s_labels_df.head(20)

Unnamed: 0,ID,Gender,Age,Glucose,Height,Weight,Glucose_Level
0,1,'Male',38,99,180,53,2
1,1,'Male',38,102,180,53,3
2,1,'Male',38,103,180,53,3
3,1,'Male',38,128,180,53,6
4,1,'Male',38,130,180,53,6
5,1,'Male',38,134,180,53,7
6,1,'Male',38,136,180,53,7
32,2,'Male',25,111,187,75,5
31,2,'Male',25,108,187,75,4
33,2,'Male',25,118,187,75,5


In [None]:
labels_df["ID"].

In [26]:
# grouping by ID
labels_gb = s_labels_df.groupby('ID')

In [27]:
labels_gb.head()

Unnamed: 0,ID,Gender,Age,Glucose,Height,Weight,Glucose_Level
0,1,'Male',38,99,180,53,2
1,1,'Male',38,102,180,53,3
2,1,'Male',38,103,180,53,3
3,1,'Male',38,128,180,53,6
4,1,'Male',38,130,180,53,6
...,...,...,...,...,...,...,...
41,22,'Female',24,124,170,50,6
40,22,'Female',24,108,170,50,4
39,22,'Female',24,88,170,50,1
42,23,'Male',27,100,173,57,2


In [20]:
labels_gb.get_group(23)

Unnamed: 0,ID,Gender,Age,Glucose,Height,Weight,Glucose_Level
42,23,'Male',27,100,173,57,2
43,23,'Male',27,108,173,57,4


In [28]:
labels_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67 entries, 0 to 66
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   ID             67 non-null     int32   
 1   Gender         67 non-null     object  
 2   Age            67 non-null     int64   
 3   Glucose        67 non-null     int64   
 4   Height         67 non-null     int64   
 5   Weight         67 non-null     int64   
 6   Glucose_Level  67 non-null     category
dtypes: category(1), int32(1), int64(4), object(1)
memory usage: 3.4+ KB


### from mat to csv

In [13]:
def matTocsv(path, s_dir): 
    dirc = os.listdir(path)
    for file in dirc:
        #load mat file
        mat = scipy.io.loadmat(path +'/'+ file)
        variables = scipy.io.whosmat(path +'/'+ file)
        df_list = []
        for tple in variables:
            df = pd.DataFrame(mat[tple[0]], columns=[tple[0]])
            df_list.append
        if len(df_list) > 1:
            df_f = pd.concat(df_list, axis=1)
        else:
            df_f = df
        #save the file as csv
        file = "/"+file[:-4]
        df_f.to_csv(s_dir +file+ '.csv', index= False)

#### warning!!!! do not run again and again
<br>
<br>
<br>

In [14]:
#convert and save the files
dir_path = 'PPG_Dataset/RawData' # dir of files that I want to convert
dir_s_path = 'PPG_Dataset/RawCSV' # dir for where I want to save the files
matTocsv(dir_path, dir_s_path)

In [29]:
s_labels_df.to_csv("PPG_Dataset/Labels/labels_final.csv", index=False)

### down sampling 

In [35]:
def down_sampling(path, ds=15):
    dirc = os.listdir(path)
    for dic in dirc:
        if dic == ".ipynb_checkpoints":
            continue
        df = pd.read_csv(path+"/"+dic)
        df = df[df.index%ds == 0] #indexing each one in 15 sample
        df.index = np.arange(0, len(df)) # reset the index
        df.to_csv("PPG_Dataset/DownSampling/PPG_ds/"+dic, index= False)

In [36]:
down_sampling("PPG_Dataset/RawCSV")