## ANALISIS DE DATOS DE PULSO DE VOLUMEN SANGUINEO (BVP)

El presente es para analizar los datos de pulso de volumen sanguineo del smartwatch, el cual tiene un procesamiento de datos en 64 Hz

In [115]:
# Importando Pandas y otras librerias
import pandas as pd
import numpy as np

In [116]:
PACIENTE = '001'
PATH_FOLDER = 'G:\\Dataset\\big-ideas-lab-glycemic-variability-and-wearable-device-data-1.1.2\\'+PACIENTE+'\\'

In [117]:
# Leyendo el CSV
bvp_values = pd.read_csv(PATH_FOLDER + 'BVP_'+PACIENTE+'.csv', engine='python', na_values="not available")

In [118]:
bvp_values.head()

Unnamed: 0,datetime,bvp
0,2020-02-13 15:28:50.000000,-0.0
1,2020-02-13 15:28:50.015625,-0.0
2,2020-02-13 15:28:50.031250,-0.0
3,2020-02-13 15:28:50.046875,-0.0
4,2020-02-13 15:28:50.062500,-0.0


In [119]:
bvp_values.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40592838 entries, 0 to 40592837
Data columns (total 2 columns):
 #   Column    Dtype  
---  ------    -----  
 0   datetime  object 
 1    bvp      float64
dtypes: float64(1), object(1)
memory usage: 619.4+ MB


In [120]:
bvp_values.count()

datetime    40592838
 bvp        40592838
dtype: int64

In [121]:
bvp_values["datetime"].head()

0    2020-02-13 15:28:50.000000
1    2020-02-13 15:28:50.015625
2    2020-02-13 15:28:50.031250
3    2020-02-13 15:28:50.046875
4    2020-02-13 15:28:50.062500
Name: datetime, dtype: object

### Trabajando con Datetime
Lo primero sera convertir los datetime a el formato correcto, ya que lo esta detectando como object, lo siguiente sera colocar como index las fechas y al final agrupar por cada 5 minutos los datos para obtener el promedio y media de los datos


In [122]:
# Convertimos en fechas los datimetimes
bvp_values['datetime'] = pd.to_datetime(bvp_values['datetime'])
print(bvp_values.columns)

Index(['datetime', ' bvp'], dtype='object')


In [123]:


## Se coloca indices como datetime
bvp_values = bvp_values.set_index('datetime')
print(bvp_values.columns)


Index([' bvp'], dtype='object')


In [124]:
df_procesado_5min = bvp_values[' bvp'].resample('5min') 

### Calculamos le media, la mediana y demas factores de estadistica

En este caso tenemos que obtener el promedio, mediana, max, min, desviacion estandar y quartiles

In [125]:
# Funcion para calcular los cuartiles 1 y 3 que indican en el paper
# 
def quartiles(x):
    return pd.Series([x.quantile(0.25), x.quantile(0.75)], index=['q1', 'q3'])


In [126]:
# Crear a serie de dataframe de 5 min
series5min = quartiles(df_procesado_5min)
series5min.head()

q1    datetime
2020-02-13 15:25:00   -42.1500
2020-0...
q3    datetime
2020-02-13 15:25:00    37.6625
2020-0...
dtype: object

In [127]:
# Definimos los metodos del dataframe a calcular
df_5min = df_procesado_5min.agg(['mean', 'median', 'max', 'min', 'std'])
print(df_5min.columns)
# Removemos las columnas que no necesitamos por ahora
# Supongamos que tienes tus datos en un DataFrame llamado 'df'
# columns_to_remove = [' temp']
# df_5min = df_5min.drop(columns=columns_to_remove)
df_5min.head(20)

Index(['mean', 'median', 'max', 'min', 'std'], dtype='object')


Unnamed: 0_level_0,mean,median,max,min,std
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-13 15:25:00,0.098167,-0.2,963.62,-671.54,105.020335
2020-02-13 15:30:00,-0.025119,0.18,251.04,-469.78,30.367606
2020-02-13 15:35:00,0.002832,0.18,203.76,-374.16,21.528509
2020-02-13 15:40:00,-0.000613,0.0,179.16,-214.48,24.298606
2020-02-13 15:45:00,-0.011019,-0.015,292.87,-556.94,43.501639
2020-02-13 15:50:00,0.010244,0.04,75.54,-138.71,11.85724
2020-02-13 15:55:00,0.02184,0.33,265.17,-315.53,37.761228
2020-02-13 16:00:00,-0.017135,0.76,284.14,-334.29,26.784233
2020-02-13 16:05:00,0.043231,0.11,265.64,-302.61,36.802137
2020-02-13 16:10:00,-0.039279,-0.19,111.75,-221.17,15.377645


In [128]:
# Lo mismo aplicamos para 1 hora
df_procesado_1hora = bvp_values[' bvp'].resample('1h') 
# Obtenemos el promedio
df_1hora = df_procesado_1hora.agg(['mean', 'median', 'max', 'min', 'std'])

# Removemos las columnas que no necesitamos por ahora
# df_1hora = df_1hora.drop(columns=columns_to_remove)
df_1hora.head(20)

Unnamed: 0_level_0,mean,median,max,min,std
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-13 15:00:00,0.00338,0.13,963.62,-671.54,35.842116
2020-02-13 16:00:00,-0.000688,0.17,424.26,-347.67,32.625133
2020-02-13 17:00:00,-0.000937,0.64,408.01,-431.19,24.43067
2020-02-13 18:00:00,-0.000193,0.51,188.92,-309.22,18.404141
2020-02-13 19:00:00,0.001366,0.26,225.74,-358.15,15.851569
2020-02-13 20:00:00,-0.003219,1.06,267.1,-336.76,30.85677
2020-02-13 21:00:00,0.001497,1.07,259.29,-454.03,26.26219
2020-02-13 22:00:00,0.000889,1.43,1805.72,-1268.55,42.680289
2020-02-13 23:00:00,-0.00054,0.34,1081.19,-915.18,51.974005
2020-02-14 00:00:00,6.5e-05,0.64,592.73,-445.04,11.691194


In [129]:
# Separar los cuartiles en columnas individuales
# Obtenemos los quantiles
df_5min_quantil1 = df_procesado_5min.quantile(0.25)
df_5min_quantil3 = df_procesado_5min.quantile(0.75)
df_1hora_quantil1 = df_procesado_1hora.quantile(0.25)
df_1hora_quantil3 = df_procesado_1hora.quantile(0.75)
df_5min['q1'] = df_5min_quantil1
df_5min['q3'] = df_5min_quantil3
df_5min.head(10)
# df_1hora[['q1', 'q3']] = [df_1hora_quantil1,df_1hora_quantil3]


Unnamed: 0_level_0,mean,median,max,min,std,q1,q3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-02-13 15:25:00,0.098167,-0.2,963.62,-671.54,105.020335,-42.15,37.6625
2020-02-13 15:30:00,-0.025119,0.18,251.04,-469.78,30.367606,-3.76,3.78
2020-02-13 15:35:00,0.002832,0.18,203.76,-374.16,21.528509,-1.49,1.51
2020-02-13 15:40:00,-0.000613,0.0,179.16,-214.48,24.298606,-2.81,2.2825
2020-02-13 15:45:00,-0.011019,-0.015,292.87,-556.94,43.501639,-13.4425,15.0525
2020-02-13 15:50:00,0.010244,0.04,75.54,-138.71,11.85724,-3.5925,3.55
2020-02-13 15:55:00,0.02184,0.33,265.17,-315.53,37.761228,-8.585,10.5525
2020-02-13 16:00:00,-0.017135,0.76,284.14,-334.29,26.784233,-9.46,10.2125
2020-02-13 16:05:00,0.043231,0.11,265.64,-302.61,36.802137,-10.34,11.41
2020-02-13 16:10:00,-0.039279,-0.19,111.75,-221.17,15.377645,-5.9625,5.91


In [130]:
df_5min.count()

mean      2125
median    2125
max       2125
min       2125
std       2125
q1        2125
q3        2125
dtype: int64

In [131]:
# Igual con 1hora de dataset
df_1hora['q1'] = df_1hora_quantil1
df_1hora['q3'] = df_1hora_quantil3
df_1hora.head(10)

Unnamed: 0_level_0,mean,median,max,min,std,q1,q3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-02-13 15:00:00,0.00338,0.13,963.62,-671.54,35.842116,-4.38,4.5325
2020-02-13 16:00:00,-0.000688,0.17,424.26,-347.67,32.625133,-7.8,7.79
2020-02-13 17:00:00,-0.000937,0.64,408.01,-431.19,24.43067,-7.27,7.74
2020-02-13 18:00:00,-0.000193,0.51,188.92,-309.22,18.404141,-5.49,6.51
2020-02-13 19:00:00,0.001366,0.26,225.74,-358.15,15.851569,-4.06,4.14
2020-02-13 20:00:00,-0.003219,1.06,267.1,-336.76,30.85677,-9.01,10.21
2020-02-13 21:00:00,0.001497,1.07,259.29,-454.03,26.26219,-7.43,8.2
2020-02-13 22:00:00,0.000889,1.43,1805.72,-1268.55,42.680289,-13.0,13.01
2020-02-13 23:00:00,-0.00054,0.34,1081.19,-915.18,51.974005,-10.19,9.7
2020-02-14 00:00:00,6.5e-05,0.64,592.73,-445.04,11.691194,-7.44,6.79


In [132]:
df_1hora.count()

mean      184
median    184
max       184
min       184
std       184
q1        184
q3        184
dtype: int64

In [133]:
# Exportamos los resultados en un csv
df_5min.to_csv("BVP_5min_"+PACIENTE+".csv")
df_1hora.to_csv("BVP_1hora_"+PACIENTE+".csv")

### ARCHIVOS CSV GENERADOS CON EXITO PARA 5 MIN Y 1 HORA