## ANALISIS DE DATOS DE ACTIVIDAD ELCTRODERMICA (IBI)

El presente es para analizar los datos de la actividad electrodermica del smartwatch, el cual tiene un procesamiento de datos en 4 Hz

In [1]:
# Importando Pandas y otras librerias
import pandas as pd
import numpy as np

In [2]:
# Leyendo el CSV
eda_values = pd.read_csv('EDA_016.csv', engine='python', na_values="not available")

In [3]:
eda_values.head()

Unnamed: 0,datetime,eda
0,2020-07-16 09:29:03.000,0.0
1,2020-07-16 09:29:03.250,0.840149
2,2020-07-16 09:29:03.500,0.457482
3,2020-07-16 09:29:03.750,0.0
4,2020-07-16 09:29:04.000,0.052509


In [4]:
eda_values.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2232594 entries, 0 to 2232593
Data columns (total 2 columns):
 #   Column    Dtype  
---  ------    -----  
 0   datetime  object 
 1    eda      float64
dtypes: float64(1), object(1)
memory usage: 34.1+ MB


In [5]:
eda_values.count()

datetime    2232594
 eda        2232594
dtype: int64

In [6]:
eda_values["datetime"].head()

0    2020-07-16 09:29:03.000
1    2020-07-16 09:29:03.250
2    2020-07-16 09:29:03.500
3    2020-07-16 09:29:03.750
4    2020-07-16 09:29:04.000
Name: datetime, dtype: object

### Trabajando con Datetime
Lo primero sera convertir los datetime a el formato correcto, ya que lo esta detectando como object, lo siguiente sera colocar como index las fechas y al final agrupar por cada 5 minutos los datos para obtener el promedio y media de los datos


In [7]:
# Convertimos en fechas los datimetimes
eda_values['datetime'] = pd.to_datetime(eda_values['datetime'])
print(eda_values.columns)

Index(['datetime', ' eda'], dtype='object')


In [8]:


## Se coloca indices como datetime
eda_values = eda_values.set_index('datetime')
print(eda_values.columns)


Index([' eda'], dtype='object')


In [9]:
df_procesado_5min = eda_values[' eda'].resample('5min') 

### Calculamos le media, la mediana y demas factores de estadistica

En este caso tenemos que obtener el promedio, mediana, max, min, desviacion estandar y quartiles

In [10]:
# Funcion para calcular los cuartiles 1 y 3 que indican en el paper
# 
def quartiles(x):
    return pd.Series([x.quantile(0.25), x.quantile(0.75)], index=['q1', 'q3'])


In [11]:
# Crear a serie de dataframe de 5 min
series5min = quartiles(df_procesado_5min)
series5min.head()

q1    datetime
2020-07-16 09:25:00    0.005123
2020-...
q3    datetime
2020-07-16 09:25:00    0.335883
2020-...
dtype: object

In [12]:
# Definimos los metodos del dataframe a calcular
df_5min = df_procesado_5min.agg(['mean', 'median', 'max', 'min', 'std'])
print(df_5min.columns)
# Removemos las columnas que no necesitamos por ahora
# Supongamos que tienes tus datos en un DataFrame llamado 'df'
# columns_to_remove = [' temp']
# df_5min = df_5min.drop(columns=columns_to_remove)
df_5min.head(20)

Index(['mean', 'median', 'max', 'min', 'std'], dtype='object')


Unnamed: 0_level_0,mean,median,max,min,std
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-07-16 09:25:00,0.448949,0.064036,3.570595,0.0,0.803402
2020-07-16 09:30:00,0.326691,0.372047,0.653164,0.016649,0.165484
2020-07-16 09:35:00,0.6319,0.631659,0.764853,0.506149,0.051198
2020-07-16 09:40:00,1.030635,1.036434,1.396314,0.658554,0.194995
2020-07-16 09:45:00,1.049776,1.044758,1.259278,0.846888,0.083042
2020-07-16 09:50:00,0.954326,0.968556,1.169628,0.759799,0.099029
2020-07-16 09:55:00,0.659374,0.639412,0.981294,0.252567,0.10866
2020-07-16 10:00:00,0.929366,0.899362,1.405279,0.702098,0.132848
2020-07-16 10:05:00,1.148832,1.090224,1.780627,0.782852,0.220604
2020-07-16 10:10:00,1.311897,1.316319,2.0547,0.899496,0.180745


In [13]:
# Lo mismo aplicamos para 1 hora
df_procesado_1hora = eda_values[' eda'].resample('1h') 
# Obtenemos el promedio
df_1hora = df_procesado_1hora.agg(['mean', 'median', 'max', 'min', 'std'])

# Removemos las columnas que no necesitamos por ahora
# df_1hora = df_1hora.drop(columns=columns_to_remove)
df_1hora.head(20)

Unnamed: 0_level_0,mean,median,max,min,std
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-07-16 09:00:00,0.765428,0.766203,3.570595,0.0,0.323079
2020-07-16 10:00:00,0.937013,0.816151,2.309762,0.0,0.435768
2020-07-16 11:00:00,0.472971,0.488726,1.624281,0.037407,0.141086
2020-07-16 12:00:00,0.341072,0.29355,2.103368,0.0,0.180508
2020-07-16 13:00:00,1.833972,1.805061,3.420278,0.260252,0.560089
2020-07-16 14:00:00,2.588916,2.678745,4.620881,0.585622,0.635568
2020-07-16 15:00:00,3.403984,3.323417,6.045711,1.045498,0.759495
2020-07-16 16:00:00,3.971467,3.568948,10.789403,0.0,1.725551
2020-07-16 17:00:00,3.402337,3.443804,7.07293,0.346129,1.016422
2020-07-16 18:00:00,0.635618,0.594518,1.457788,0.0,0.317827


In [14]:
# Separar los cuartiles en columnas individuales
# Obtenemos los quantiles
df_5min_quantil1 = df_procesado_5min.quantile(0.25)
df_5min_quantil3 = df_procesado_5min.quantile(0.75)
df_1hora_quantil1 = df_procesado_1hora.quantile(0.25)
df_1hora_quantil3 = df_procesado_1hora.quantile(0.75)
df_5min['q1'] = df_5min_quantil1
df_5min['q3'] = df_5min_quantil3
df_5min.head(10)
# df_1hora[['q1', 'q3']] = [df_1hora_quantil1,df_1hora_quantil3]


Unnamed: 0_level_0,mean,median,max,min,std,q1,q3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-07-16 09:25:00,0.448949,0.064036,3.570595,0.0,0.803402,0.005123,0.335883
2020-07-16 09:30:00,0.326691,0.372047,0.653164,0.016649,0.165484,0.139598,0.452092
2020-07-16 09:35:00,0.6319,0.631659,0.764853,0.506149,0.051198,0.591957,0.67008
2020-07-16 09:40:00,1.030635,1.036434,1.396314,0.658554,0.194995,0.858042,1.190439
2020-07-16 09:45:00,1.049776,1.044758,1.259278,0.846888,0.083042,0.986486,1.117119
2020-07-16 09:50:00,0.954326,0.968556,1.169628,0.759799,0.099029,0.877625,1.036434
2020-07-16 09:55:00,0.659374,0.639412,0.981294,0.252567,0.10866,0.571465,0.72522
2020-07-16 10:00:00,0.929366,0.899362,1.405279,0.702098,0.132848,0.819621,1.018824
2020-07-16 10:05:00,1.148832,1.090224,1.780627,0.782852,0.220604,1.001854,1.325974
2020-07-16 10:10:00,1.311897,1.316319,2.0547,0.899496,0.180745,1.179948,1.420747


In [15]:
df_5min.count()

mean      1866
median    1866
max       1866
min       1866
std       1866
q1        1866
q3        1866
dtype: int64

In [16]:
# Igual con 1hora de dataset
df_1hora['q1'] = df_1hora_quantil1
df_1hora['q3'] = df_1hora_quantil3
df_1hora.head(10)

Unnamed: 0_level_0,mean,median,max,min,std,q1,q3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-07-16 09:00:00,0.765428,0.766203,3.570595,0.0,0.323079,0.575307,1.013381
2020-07-16 10:00:00,0.937013,0.816151,2.309762,0.0,0.435768,0.576588,1.232383
2020-07-16 11:00:00,0.472971,0.488726,1.624281,0.037407,0.141086,0.408548,0.536886
2020-07-16 12:00:00,0.341072,0.29355,2.103368,0.0,0.180508,0.223111,0.412657
2020-07-16 13:00:00,1.833972,1.805061,3.420278,0.260252,0.560089,1.403703,2.273628
2020-07-16 14:00:00,2.588916,2.678745,4.620881,0.585622,0.635568,2.208517,3.023257
2020-07-16 15:00:00,3.403984,3.323417,6.045711,1.045498,0.759495,2.8145,3.837532
2020-07-16 16:00:00,3.971467,3.568948,10.789403,0.0,1.725551,2.614298,5.054976
2020-07-16 17:00:00,3.402337,3.443804,7.07293,0.346129,1.016422,2.805124,4.029639
2020-07-16 18:00:00,0.635618,0.594518,1.457788,0.0,0.317827,0.353157,0.845607


In [17]:
df_1hora.count()

mean      161
median    161
max       161
min       161
std       161
q1        161
q3        161
dtype: int64

In [18]:
# Exportamos los resultados en un csv
df_5min.to_csv("EDA_5min.csv")
df_1hora.to_csv("EDA_1hora.csv")

### ARCHIVOS CSV GENERADOS CON EXITO PARA 5 MIN Y 1 HORA