## ANALISIS DE DATOS DE HR (HEART RATE)

El presente es para analizar los datos del Heart Rate del smartwatch, el cual tiene un procesamiento de datos en 1Hz, que seria un dato por segundo

In [267]:
# Importando Pandas y otras librerias
import pandas as pd
import numpy as np

In [268]:
PACIENTE = '015'
PATH_FOLDER = 'G:\\Dataset\\big-ideas-lab-glycemic-variability-and-wearable-device-data-1.1.2\\'+PACIENTE+'\\'

In [269]:
# Leyendo el CSV
heartrate_values = pd.read_csv(PATH_FOLDER + 'HR_'+PACIENTE+'.csv', engine='python', na_values="not available")

In [270]:
heartrate_values.head()

Unnamed: 0,datetime,hr
0,2020-07-24 07:07:59,51.0
1,2020-07-24 07:08:00,51.0
2,2020-07-24 07:08:01,50.33
3,2020-07-24 07:08:02,55.75
4,2020-07-24 07:08:03,61.2


In [271]:
heartrate_values.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 436509 entries, 0 to 436508
Data columns (total 2 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   datetime  436509 non-null  object 
 1    hr       436509 non-null  float64
dtypes: float64(1), object(1)
memory usage: 6.7+ MB


In [272]:
heartrate_values.count()

datetime    436509
 hr         436509
dtype: int64

In [273]:
heartrate_values["datetime"].head()

0    2020-07-24 07:07:59
1    2020-07-24 07:08:00
2    2020-07-24 07:08:01
3    2020-07-24 07:08:02
4    2020-07-24 07:08:03
Name: datetime, dtype: object

### Trabajando con Datetime
Lo primero sera convertir los datetime a el formato correcto, ya que lo esta detectando como object, lo siguiente sera colocar como index las fechas y al final agrupar por cada 5 minutos los datos para obtener el promedio y media de los datos


In [274]:
# Convertimos en fechas los datimetimes
heartrate_values['datetime'] = pd.to_datetime(heartrate_values['datetime'])
print(heartrate_values.columns)

Index(['datetime', ' hr'], dtype='object')


In [275]:


## Se coloca el datetime como index
heartrate_values = heartrate_values.set_index('datetime')
heartrate_values.head()


Unnamed: 0_level_0,hr
datetime,Unnamed: 1_level_1
2020-07-24 07:07:59,51.0
2020-07-24 07:08:00,51.0
2020-07-24 07:08:01,50.33
2020-07-24 07:08:02,55.75
2020-07-24 07:08:03,61.2


In [276]:
df_procesado_5min = heartrate_values[' hr'].resample('5min') 

### Calculamos le media, la mediana y demas factores de estadistica

En este caso tenemos que obtener el promedio, mediana, max, min, desviacion estandar y quartiles

In [277]:
# Funcion para calcular los cuartiles 1 y 3 que indican en el paper
# 
def quartiles(x):
    return pd.Series([x.quantile(0.25), x.quantile(0.75)], index=['q1', 'q3'])


In [278]:
# Crear a serie de dataframe de 5 min
series5min = quartiles(df_procesado_5min)
series5min.head()

q1    datetime
2020-07-05 15:10:00     85.9000
2020-...
q3    datetime
2020-07-05 15:10:00     90.7300
2020-...
dtype: object

In [None]:
# Definimos los metodos del dataframe a calcular
df_5min = df_procesado_5min.agg(['mean', 'median', 'max', 'min', 'std'])
print(df_5min.columns)
# Removemos las columnas que no necesitamos por ahora
# columns_to_remove = [' temp']
# df_5min = df_5min.drop(columns=columns_to_remove)
df_5min.head(10)

Index(['mean', 'median', 'max', 'min', 'std'], dtype='object')


Unnamed: 0_level_0,mean,median,max,min,std
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-07-05 15:10:00,88.512212,88.95,106.8,70.0,4.24876
2020-07-05 15:15:00,91.581833,88.425,109.28,83.73,7.638901
2020-07-05 15:20:00,85.107633,85.08,87.07,83.58,0.694245
2020-07-05 15:25:00,84.917267,85.22,88.42,81.92,1.893955
2020-07-05 15:30:00,84.734367,85.045,94.18,70.65,7.490427
2020-07-05 15:35:00,83.552333,85.725,89.47,72.0,5.562452
2020-07-05 15:40:00,78.658767,78.235,92.18,73.28,4.277199
2020-07-05 15:45:00,91.9117,92.3,104.58,80.47,6.47203
2020-07-05 15:50:00,93.060067,89.93,109.65,82.72,7.808059
2020-07-05 15:55:00,89.4268,87.66,110.98,75.32,10.950346


In [280]:
# Lo mismo aplicamos para 1 hora
df_procesado_1hora = heartrate_values[' hr'].resample('1h') 
# Obtenemos el promedio
df_1hora = df_procesado_1hora.agg(['mean', 'median', 'max', 'min', 'std'])

# Removemos las columnas que no necesitamos por ahora
# df_1hora = df_1hora.drop(columns=columns_to_remove)
df_1hora.head(10)

Unnamed: 0_level_0,mean,median,max,min,std
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-07-05 15:00:00,87.107432,86.1,110.98,70.0,7.764608
2020-07-05 16:00:00,87.393519,90.225,122.02,53.97,12.044741
2020-07-05 17:00:00,92.909047,94.59,118.52,65.1,11.294148
2020-07-05 18:00:00,86.043358,88.0,101.88,63.5,7.16414
2020-07-05 19:00:00,86.061656,87.35,103.42,54.07,7.018286
2020-07-05 20:00:00,85.747272,85.33,121.17,61.65,8.976035
2020-07-05 21:00:00,89.374128,89.32,120.2,66.67,8.495999
2020-07-05 22:00:00,90.795614,91.15,98.48,74.63,3.676974
2020-07-05 23:00:00,92.399744,91.93,103.33,89.55,1.956739
2020-07-06 00:00:00,89.757167,90.03,110.15,67.68,4.929549


In [281]:
# Separar los cuartiles en columnas individuales
# Obtenemos los quantiles
df_5min_quantil1 = df_procesado_5min.quantile(0.25)
df_5min_quantil3 = df_procesado_5min.quantile(0.75)
df_1hora_quantil1 = df_procesado_1hora.quantile(0.25)
df_1hora_quantil3 = df_procesado_1hora.quantile(0.75)
df_5min['q1'] = df_5min_quantil1
df_5min['q3'] = df_5min_quantil3
df_5min.head(10)
# df_1hora[['q1', 'q3']] = [df_1hora_quantil1,df_1hora_quantil3]


Unnamed: 0_level_0,mean,median,max,min,std,q1,q3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-07-05 15:10:00,88.512212,88.95,106.8,70.0,4.24876,85.9,90.73
2020-07-05 15:15:00,91.581833,88.425,109.28,83.73,7.638901,86.61,95.4675
2020-07-05 15:20:00,85.107633,85.08,87.07,83.58,0.694245,84.745,85.48
2020-07-05 15:25:00,84.917267,85.22,88.42,81.92,1.893955,83.12,85.8475
2020-07-05 15:30:00,84.734367,85.045,94.18,70.65,7.490427,77.9,92.67
2020-07-05 15:35:00,83.552333,85.725,89.47,72.0,5.562452,82.6725,87.85
2020-07-05 15:40:00,78.658767,78.235,92.18,73.28,4.277199,75.0775,80.8375
2020-07-05 15:45:00,91.9117,92.3,104.58,80.47,6.47203,87.36,95.2925
2020-07-05 15:50:00,93.060067,89.93,109.65,82.72,7.808059,87.7075,97.2175
2020-07-05 15:55:00,89.4268,87.66,110.98,75.32,10.950346,79.0825,97.16


In [282]:
df_5min.count()

mean      1165
median    1165
max       1165
min       1165
std       1165
q1        1165
q3        1165
dtype: int64

In [283]:
# Igual con 1hora de dataset
df_1hora['q1'] = df_1hora_quantil1
df_1hora['q3'] = df_1hora_quantil3
df_1hora.head(10)

Unnamed: 0_level_0,mean,median,max,min,std,q1,q3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-07-05 15:00:00,87.107432,86.1,110.98,70.0,7.764608,83.18,90.47
2020-07-05 16:00:00,87.393519,90.225,122.02,53.97,12.044741,82.245,92.92
2020-07-05 17:00:00,92.909047,94.59,118.52,65.1,11.294148,86.5,99.32
2020-07-05 18:00:00,86.043358,88.0,101.88,63.5,7.16414,81.92,90.85
2020-07-05 19:00:00,86.061656,87.35,103.42,54.07,7.018286,83.445,89.835
2020-07-05 20:00:00,85.747272,85.33,121.17,61.65,8.976035,81.215,89.0
2020-07-05 21:00:00,89.374128,89.32,120.2,66.67,8.495999,86.15,91.98
2020-07-05 22:00:00,90.795614,91.15,98.48,74.63,3.676974,89.8,92.38
2020-07-05 23:00:00,92.399744,91.93,103.33,89.55,1.956739,91.18,93.33
2020-07-06 00:00:00,89.757167,90.03,110.15,67.68,4.929549,88.48,91.885


In [284]:
df_1hora.count()

mean      101
median    101
max       101
min       101
std       101
q1        101
q3        101
dtype: int64

In [285]:
# Exportamos los resultados en un csv
df_5min.to_csv("HR_5min_"+PACIENTE+".csv")
df_1hora.to_csv("HR_1hora_"+PACIENTE+".csv")

### ARCHIVOS CSV GENERADOS CON EXITO PARA 5 MIN Y 1 HORA