## ANALISIS DE DATOS DE HR (HEART RATE)

El presente es para analizar los datos del Heart Rate del smartwatch, el cual tiene un procesamiento de datos en 1Hz, que seria un dato por segundo

In [1]:
# Importando Pandas y otras librerias
import pandas as pd
import numpy as np

In [2]:
# Leyendo el CSV
heartrate_values = pd.read_csv('HR_016.csv', engine='python', na_values="not available")

In [3]:
heartrate_values.head()

Unnamed: 0,datetime,hr
0,2020-07-16 09:29:13,71.0
1,2020-07-16 09:29:14,84.0
2,2020-07-16 09:29:15,81.67
3,2020-07-16 09:29:16,82.5
4,2020-07-16 09:29:17,78.6


In [4]:
heartrate_values.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 558104 entries, 0 to 558103
Data columns (total 2 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   datetime  558104 non-null  object 
 1    hr       558104 non-null  float64
dtypes: float64(1), object(1)
memory usage: 8.5+ MB


In [5]:
heartrate_values.count()

datetime    558104
 hr         558104
dtype: int64

In [6]:
heartrate_values["datetime"].head()

0    2020-07-16 09:29:13
1    2020-07-16 09:29:14
2    2020-07-16 09:29:15
3    2020-07-16 09:29:16
4    2020-07-16 09:29:17
Name: datetime, dtype: object

### Trabajando con Datetime
Lo primero sera convertir los datetime a el formato correcto, ya que lo esta detectando como object, lo siguiente sera colocar como index las fechas y al final agrupar por cada 5 minutos los datos para obtener el promedio y media de los datos


In [7]:
# Convertimos en fechas los datimetimes
heartrate_values['datetime'] = pd.to_datetime(heartrate_values['datetime'])
print(heartrate_values.columns)

Index(['datetime', ' hr'], dtype='object')


In [8]:


## Se coloca el datetime como index
heartrate_values = heartrate_values.set_index('datetime')
heartrate_values.head()


Unnamed: 0_level_0,hr
datetime,Unnamed: 1_level_1
2020-07-16 09:29:13,71.0
2020-07-16 09:29:14,84.0
2020-07-16 09:29:15,81.67
2020-07-16 09:29:16,82.5
2020-07-16 09:29:17,78.6


In [14]:
df_procesado_5min = heartrate_values[' hr'].resample('5min') 

### Calculamos le media, la mediana y demas factores de estadistica

En este caso tenemos que obtener el promedio, mediana, max, min, desviacion estandar y quartiles

In [15]:
# Funcion para calcular los cuartiles 1 y 3 que indican en el paper
# 
def quartiles(x):
    return pd.Series([x.quantile(0.25), x.quantile(0.75)], index=['q1', 'q3'])


In [16]:
# Crear a serie de dataframe de 5 min
series5min = quartiles(df_procesado_5min)
series5min.head()

q1    datetime
2020-07-16 09:25:00    90.2050
2020-0...
q3    datetime
2020-07-16 09:25:00    97.2550
2020-0...
dtype: object

In [17]:
# Definimos los metodos del dataframe a calcular
df_5min = df_procesado_5min.agg(['mean', 'median', 'max', 'min', 'std'])
print(df_5min.columns)
# Removemos las columnas que no necesitamos por ahora
# columns_to_remove = [' temp']
# df_5min = df_5min.drop(columns=columns_to_remove)
df_5min.head(20)

Index(['mean', 'median', 'max', 'min', 'std'], dtype='object')


Unnamed: 0_level_0,mean,median,max,min,std
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-07-16 09:25:00,92.710426,94.3,98.04,71.0,5.68959
2020-07-16 09:30:00,78.698367,78.385,89.25,64.33,5.966452
2020-07-16 09:35:00,72.131633,73.325,80.73,62.18,5.619535
2020-07-16 09:40:00,80.821033,77.78,96.55,73.35,6.748584
2020-07-16 09:45:00,75.1501,75.125,81.53,68.9,2.996422
2020-07-16 09:50:00,76.813,75.68,85.93,71.35,3.940826
2020-07-16 09:55:00,76.277867,74.5,80.82,73.53,2.470976
2020-07-16 10:00:00,82.5922,79.075,111.48,72.47,11.078854
2020-07-16 10:05:00,109.748933,111.03,114.68,99.22,4.470669
2020-07-16 10:10:00,106.489533,107.355,122.75,82.33,11.449818


In [18]:
# Lo mismo aplicamos para 1 hora
df_procesado_1hora = heartrate_values[' hr'].resample('1h') 
# Obtenemos el promedio
df_1hora = df_procesado_1hora.agg(['mean', 'median', 'max', 'min', 'std'])

# Removemos las columnas que no necesitamos por ahora
# df_1hora = df_1hora.drop(columns=columns_to_remove)
df_1hora.head(20)

Unnamed: 0_level_0,mean,median,max,min,std
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-07-16 09:00:00,77.057385,76.52,98.04,62.18,6.135436
2020-07-16 10:00:00,88.877458,83.665,122.75,67.9,12.509688
2020-07-16 11:00:00,85.929608,83.22,111.55,64.32,9.778335
2020-07-16 12:00:00,81.495628,80.25,104.12,70.95,6.695853
2020-07-16 13:00:00,84.967678,84.03,111.73,56.97,9.679853
2020-07-16 14:00:00,84.185486,81.47,135.03,59.55,13.558949
2020-07-16 15:00:00,93.193364,95.33,129.33,55.28,14.189919
2020-07-16 16:00:00,97.614256,96.71,122.65,61.57,13.917173
2020-07-16 17:00:00,92.581403,90.475,126.17,59.05,10.287956
2020-07-16 18:00:00,86.287961,85.05,107.68,69.77,7.600619


In [19]:
# Separar los cuartiles en columnas individuales
# Obtenemos los quantiles
df_5min_quantil1 = df_procesado_5min.quantile(0.25)
df_5min_quantil3 = df_procesado_5min.quantile(0.75)
df_1hora_quantil1 = df_procesado_1hora.quantile(0.25)
df_1hora_quantil3 = df_procesado_1hora.quantile(0.75)
df_5min['q1'] = df_5min_quantil1
df_5min['q3'] = df_5min_quantil3
df_5min.head(10)
# df_1hora[['q1', 'q3']] = [df_1hora_quantil1,df_1hora_quantil3]


Unnamed: 0_level_0,mean,median,max,min,std,q1,q3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-07-16 09:25:00,92.710426,94.3,98.04,71.0,5.68959,90.205,97.255
2020-07-16 09:30:00,78.698367,78.385,89.25,64.33,5.966452,74.72,84.77
2020-07-16 09:35:00,72.131633,73.325,80.73,62.18,5.619535,66.8775,77.0825
2020-07-16 09:40:00,80.821033,77.78,96.55,73.35,6.748584,76.2525,84.955
2020-07-16 09:45:00,75.1501,75.125,81.53,68.9,2.996422,73.8175,77.02
2020-07-16 09:50:00,76.813,75.68,85.93,71.35,3.940826,74.1825,77.0075
2020-07-16 09:55:00,76.277867,74.5,80.82,73.53,2.470976,74.02,78.72
2020-07-16 10:00:00,82.5922,79.075,111.48,72.47,11.078854,76.01,81.29
2020-07-16 10:05:00,109.748933,111.03,114.68,99.22,4.470669,107.835,113.18
2020-07-16 10:10:00,106.489533,107.355,122.75,82.33,11.449818,102.2725,114.9675


In [20]:
df_5min.count()

mean      1866
median    1866
max       1866
min       1866
std       1866
q1        1866
q3        1866
dtype: int64

In [21]:
# Igual con 1hora de dataset
df_1hora['q1'] = df_1hora_quantil1
df_1hora['q3'] = df_1hora_quantil3
df_1hora.head(10)

Unnamed: 0_level_0,mean,median,max,min,std,q1,q3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-07-16 09:00:00,77.057385,76.52,98.04,62.18,6.135436,73.98,78.85
2020-07-16 10:00:00,88.877458,83.665,122.75,67.9,12.509688,80.3,97.25
2020-07-16 11:00:00,85.929608,83.22,111.55,64.32,9.778335,78.5,93.59
2020-07-16 12:00:00,81.495628,80.25,104.12,70.95,6.695853,76.13,85.43
2020-07-16 13:00:00,84.967678,84.03,111.73,56.97,9.679853,80.68,88.755
2020-07-16 14:00:00,84.185486,81.47,135.03,59.55,13.558949,75.47,94.8
2020-07-16 15:00:00,93.193364,95.33,129.33,55.28,14.189919,85.28,101.04
2020-07-16 16:00:00,97.614256,96.71,122.65,61.57,13.917173,88.85,109.8
2020-07-16 17:00:00,92.581403,90.475,126.17,59.05,10.287956,87.25,95.42
2020-07-16 18:00:00,86.287961,85.05,107.68,69.77,7.600619,81.35,90.52


In [22]:
df_1hora.count()

mean      161
median    161
max       161
min       161
std       161
q1        161
q3        161
dtype: int64

In [23]:
# Exportamos los resultados en un csv
df_5min.to_csv("HR_5min.csv")
df_1hora.to_csv("HR_1hora.csv")

### ARCHIVOS CSV GENERADOS CON EXITO PARA 5 MIN Y 1 HORA