In [1]:
import pickle
import numpy as np
import seaborn as sns
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from keras.layers import Dropout
from keras.optimizers import Adam
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix

In [2]:
path = "/kaggle/input/dataset-chest/"

In [3]:
df_chest_filt = pd.read_pickle((path + "combined_chest_filtered.pkl"))

In [4]:
df_chest_filt.head(10)

Unnamed: 0,id,ACC_x,ACC_y,ACC_z,ECG,EMG,EDA,TEMP,RESP,label
214583,2.0,0.8914,-0.1102,-0.2576,0.030945,-0.003708,5.710983,29.083618,1.191711,1.0
214584,2.0,0.8926,-0.1086,-0.2544,0.033646,-0.014145,5.719376,29.122437,1.139832,1.0
214585,2.0,0.893,-0.1094,-0.258,0.033005,0.010208,5.706406,29.115234,1.141357,1.0
214586,2.0,0.8934,-0.1082,-0.2538,0.031815,0.012634,5.712509,29.126709,1.15509,1.0
214587,2.0,0.893,-0.1096,-0.257,0.03035,0.00206,5.727005,29.100861,1.133728,1.0
214588,2.0,0.8906,-0.1078,-0.2538,0.03035,-0.002792,5.70755,29.126709,1.13678,1.0
214589,2.0,0.8906,-0.1082,-0.2566,0.032639,-0.001968,5.715561,29.116699,1.115417,1.0
214590,2.0,0.8906,-0.108,-0.26,0.029572,0.007919,5.706406,29.106628,1.13678,1.0
214591,2.0,0.8872,-0.1062,-0.2586,0.026138,-0.00705,5.708313,29.121002,1.106262,1.0
214592,2.0,0.8862,-0.1068,-0.2616,0.027649,-0.01387,5.715561,29.108063,1.081848,1.0


In [5]:
df_chest_filt.describe()

Unnamed: 0,id,ACC_x,ACC_y,ACC_z,ECG,EMG,EDA,TEMP,RESP,label
count,23206320.0,23206320.0,23206320.0,23206320.0,23206320.0,23206320.0,23206320.0,23206320.0,23206320.0,23206320.0
mean,9.399516,0.8048063,-0.03151446,-0.3450945,0.001112485,-0.00295454,4.601632,33.77196,0.05366276,1.636944
std,4.705341,0.1298794,0.1077167,0.264312,0.270831,0.01401874,3.451288,1.315684,3.615568,0.7533773
min,2.0,0.2345999,-0.6222,-1.7984,-1.499405,-0.6101532,0.4451752,28.04526,-30.39398,1.0
25%,5.0,0.729,-0.06239998,-0.5446,-0.09146118,-0.009841919,1.989365,33.54745,-1.893616,1.0
50%,9.0,0.8554,-0.02020001,-0.3234,-0.02522278,-0.002609253,3.719711,34.12451,-0.2746582,1.0
75%,14.0,0.8974,0.02380002,-0.1682,0.03648376,0.004119873,6.327057,34.53427,1.957703,2.0
max,17.0,1.5534,0.5176001,1.0418,1.499954,1.04718,20.43343,35.77805,37.88605,3.0


In [6]:
df_chest_filt.groupby(['id', 'label']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,ACC_x,ACC_y,ACC_z,ECG,EMG,EDA,TEMP,RESP
id,label,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2.0,1.0,800800,800800,800800,800800,800800,800800,800800,800800
2.0,2.0,430500,430500,430500,430500,430500,430500,430500,430500
2.0,3.0,253400,253400,253400,253400,253400,253400,253400,253400
3.0,1.0,798000,798000,798000,798000,798000,798000,798000,798000
3.0,2.0,447917,447917,447917,447917,447917,447917,447917,447917
3.0,3.0,262500,262500,262500,262500,262500,262500,262500,262500
4.0,1.0,810601,810601,810601,810601,810601,810601,810601,810601
4.0,2.0,444500,444500,444500,444500,444500,444500,444500,444500
4.0,3.0,260400,260400,260400,260400,260400,260400,260400,260400
5.0,1.0,838600,838600,838600,838600,838600,838600,838600,838600


In [7]:
# Crear un modelo CNN para clasificación binaria para los datos recopilados de pecho
sampling_rate = 700  # Hz
window_time = 5  # segundos
sequence_length = sampling_rate * window_time
step_size = sequence_length // 2

# Extraer features y labels
X = df_chest_filt[['ACC_x', 'ACC_y', 'ACC_z', 'ECG', 'EMG', 'EDA', 'TEMP', 'RESP']].values
y = df_chest_filt['label'].values

# Crear segmentos con overlapping windows
segments = []
labels = []
for i in range(0, len(X) - sequence_length, step_size):
    segment = X[i:i + sequence_length]
    label = y[i + sequence_length - 1]  
    segments.append(segment)
    labels.append(label)

# Convertir a vector 
X_segments = np.array(segments)
y_segments = np.array(labels)
# One hot encoding, clasificar label 1(fase neutral) y 3(diversión) a un grupo(no estrés) y label 2(estrés) a otro
y_segments_binary = (y_segments != 2).astype(int)

# Redimensionar X_segments para adaptar a la dimensión de entrada para LSTM
num_features = X_segments.shape[2]
X_segments_reshaped = X_segments.reshape((X_segments.shape[0], X_segments.shape[1], num_features))

# Definir el input layer para todos los sensores
input_layer = Input(shape=(sequence_length, num_features))

# Función para crear un bloque convolucional de 1D 
def conv_block(x, filters, kernel_size, stride, pool_size, pool_stride):
    x = Conv1D(filters=filters, kernel_size=kernel_size, strides=stride, activation='relu')(x)
    x = MaxPooling1D(pool_size=pool_size, strides=pool_stride)(x)
    return x

# Aplicar el bloque convolucional al input
output = conv_block(input_layer, filters=8, kernel_size=15, stride=2, pool_size=4, pool_stride=4)

# Conexión completa entre layers
x = Dense(32, activation='relu')(Flatten()(output))
x = Dense(16, activation='relu')(x)

# Output layer
output_layer = Dense(1, activation='sigmoid')(x)

# Generar el modelo
model_c_binary = Model(inputs=input_layer, outputs=output_layer)

# Imprimir por pantalla el resumen del modelo
model_c_binary.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 3500, 8)]         0         
                                                                 
 conv1d (Conv1D)             (None, 1743, 8)           968       
                                                                 
 max_pooling1d (MaxPooling1  (None, 435, 8)            0         
 D)                                                              
                                                                 
 flatten (Flatten)           (None, 3480)              0         
                                                                 
 dense (Dense)               (None, 32)                111392    
                                                                 
 dense_1 (Dense)             (None, 16)                528       
                                                             

In [8]:
# Normalización de datos
scaler = StandardScaler()
X_segments_normalized = scaler.fit_transform(X_segments_reshaped.reshape(-1, num_features)).reshape(X_segments_reshaped.shape)

# Dividir los datos para entrenamiento(80%) y para prueba(20%)
X_train, X_test, y_train, y_test = train_test_split(X_segments_normalized, y_segments_binary, test_size=0.2, random_state=42)

# Ejecutar el modelo
model_c_binary.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Entrenar el modelo
model_c_binary.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluar la precisión con los datos de prueba
loss, accuracy = model_c_binary.evaluate(X_test, y_test)
print(f'Precisión del modelo de clasificación binaria para datos de pecho: {accuracy * 100:.2f}%')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Precisión del modelo de clasificación binaria para datos de pecho: 97.51%


In [9]:
y_pred_binary = (model_c_binary.predict(X_test) > 0.5).astype(int)
y_true_binary = y_test 
# Calcular F1-score, precisión y recall
f1 = f1_score(y_true_binary, y_pred_binary)
precision = precision_score(y_true_binary, y_pred_binary)
recall = recall_score(y_true_binary, y_pred_binary)

# Imprimir los reesultados
print("Resultados del modelo de clasificación binaria para datos de pecho: ")
print(f'F1-score: {f1:.4f}')
print(f'Precisión: {precision:.4f}')
print(f'Recall: {recall:.4f}') # Recall es la Tasa de verdaderos positivos

# Imprimir la matriz de confusión
conf_matrix = confusion_matrix(y_true_binary, y_pred_binary)
print('Matriz de confusión:')
print(conf_matrix)

Resultados del modelo de clasificación binaria para datos de pecho: 
F1-score: 0.9819
Precisión: 0.9857
Recall: 0.9781
Matriz de confusión:
[[ 797   26]
 [  40 1789]]


In [10]:
# Mejorar el modelo modificando algunos parámetros
sampling_rate = 700  # Hz
window_time = 5  # segundos
sequence_length = sampling_rate * window_time
step_size = sequence_length // 2

# Extraer features y labels
X = df_chest_filt[['ACC_x', 'ACC_y', 'ACC_z', 'ECG', 'EMG', 'EDA', 'TEMP', 'RESP']].values
y = df_chest_filt['label'].values

# Crear segmentos con overlapping windows
segments = []
labels = []
for i in range(0, len(X) - sequence_length, step_size):
    segment = X[i:i + sequence_length]
    label = y[i + sequence_length - 1]  
    segments.append(segment)
    labels.append(label)

# Convertir a vector 
X_segments = np.array(segments)
y_segments = np.array(labels)
# One hot encoding, clasificar label 1(fase neutral) y 3(diversión) a un grupo(no estrés) y label 2(estrés) a otro
y_segments_binary = (y_segments != 2).astype(int)

# Redimensionar X_segments para adaptar a la dimensión de entrada para LSTM
num_features = X_segments.shape[2]
X_segments_reshaped = X_segments.reshape((X_segments.shape[0], X_segments.shape[1], num_features))

# Definir el input layer para todos los sensores
input_layer = Input(shape=(sequence_length, num_features))

# Función para crear un bloque convolucional de 1D 
def conv_block(x, filters, kernel_size, stride, pool_size, pool_stride):
    x = Conv1D(filters=filters, kernel_size=kernel_size, strides=stride, activation='relu')(x)
    x = MaxPooling1D(pool_size=pool_size, strides=pool_stride)(x)
    return x

# Aplicar el bloque convolucional al input
output = conv_block(input_layer, filters=16, kernel_size=15, stride=2, pool_size=4, pool_stride=4)
output = conv_block(output, filters=32, kernel_size=10, stride=1, pool_size=2, pool_stride=2)

# Conexión completa entre layers
x = Dense(32, activation='relu')(Flatten()(output))
x = Dropout(0.5)(x)
x = Dense(16, activation='relu')(x)
x = Dropout(0.5)(x)

# Output layer
output_layer = Dense(1, activation='sigmoid')(x)

# Generar el modelo
model_c_binary = Model(inputs=input_layer, outputs=output_layer)

# Imprimir por pantalla el resumen del modelo
model_c_binary.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 3500, 8)]         0         
                                                                 
 conv1d_1 (Conv1D)           (None, 1743, 16)          1936      
                                                                 
 max_pooling1d_1 (MaxPoolin  (None, 435, 16)           0         
 g1D)                                                            
                                                                 
 conv1d_2 (Conv1D)           (None, 426, 32)           5152      
                                                                 
 max_pooling1d_2 (MaxPoolin  (None, 213, 32)           0         
 g1D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 6816)              0   

In [11]:
# Normalización de datos
scaler = StandardScaler()
X_segments_normalized = scaler.fit_transform(X_segments_reshaped.reshape(-1, num_features)).reshape(X_segments_reshaped.shape)

# Dividir los datos para entrenamiento(80%) y para prueba(20%)
X_train, X_test, y_train, y_test = train_test_split(X_segments_normalized, y_segments_binary, test_size=0.2, random_state=42)

# Ejecutar el modelo
optimizer = Adam(lr=0.01)
model_c_binary.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Entrenar el modelo
model_c_binary.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluar la precisión con los datos de prueba
loss, accuracy = model_c_binary.evaluate(X_test, y_test)
print(f'Precisión del modelo de clasificación binaria para datos de pecho: {accuracy * 100:.2f}%')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Precisión del modelo de clasificación binaria para datos de pecho: 99.21%


In [12]:
y_pred_binary = (model_c_binary.predict(X_test) > 0.5).astype(int)
y_true_binary = y_test 
# Calcular F1-score, precisión y recall
f1 = f1_score(y_true_binary, y_pred_binary)
precision = precision_score(y_true_binary, y_pred_binary)
recall = recall_score(y_true_binary, y_pred_binary)

# Imprimir los reesultados
print("Resultados del modelo de clasificación binaria para datos de pecho: ")
print(f'F1-score: {f1:.4f}')
print(f'Precisión: {precision:.4f}')
print(f'Recall: {recall:.4f}') # Recall es la Tasa de verdaderos positivos

# Imprimir la matriz de confusión
conf_matrix = confusion_matrix(y_true_binary, y_pred_binary)
print('Matriz de confusión:')
print(conf_matrix)

Resultados del modelo de clasificación binaria para datos de pecho: 
F1-score: 0.9943
Precisión: 0.9908
Recall: 0.9978
Matriz de confusión:
[[ 806   17]
 [   4 1825]]


In [13]:
# Generar un modelo CNN para la clasificaión de tres estados para los datos de pecho
# Modificar el modelo anterior para adaptar a la nueva funsión
X_segments_reshaped_2d = X_segments_reshaped.reshape(-1, num_features)

# Normalizar datos
scaler = StandardScaler()
X_segments_normalized_2d = scaler.fit_transform(X_segments_reshaped_2d)

# Redimensionar los datos normalizados a 3D
X_segments_normalized = X_segments_normalized_2d.reshape(X_segments_reshaped.shape)

# One hot encoding, num_classes = 4 porque los valores de label son de 1 a 3, tiene que ser mayor que 3 para que lo incluya
y_segments_one_hot = to_categorical(y_segments, num_classes=4) 

# Dividir datos para entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_segments_normalized, y_segments_one_hot, test_size=0.2, random_state=42)

input_layer = Input(shape=(sequence_length, num_features))

def conv_block(x, filters, kernel_size, stride, pool_size, pool_stride):
    x = Conv1D(filters=filters, kernel_size=kernel_size, strides=stride, activation='relu')(x)
    x = MaxPooling1D(pool_size=pool_size, strides=pool_stride)(x)
    return x

# Aplicar el bloque convolucional al input
output = conv_block(input_layer, filters=16, kernel_size=15, stride=2, pool_size=4, pool_stride=4)
output = conv_block(output, filters=32, kernel_size=10, stride=1, pool_size=2, pool_stride=2)

# Conexión completa entre layers
x = Dense(32, activation='relu')(Flatten()(output))
x = Dropout(0.5)(x)
x = Dense(16, activation='relu')(x)
x = Dropout(0.5)(x)

# Output layer para 3 clases, donde 4 es el num_classes
output_layer = Dense(4, activation='softmax')(x)

# Generar el modelo para clasificación de 3 estados
model_c_three = Model(inputs=input_layer, outputs=output_layer)

model_c_three.summary()
optimizer = Adam(lr=0.01)
model_c_three.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model_c_three.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

loss, accuracy = model_c_three.evaluate(X_test, y_test)
print(f'Precisión del modelo de clasificación de tres estados para datos de pecho: {accuracy * 100:.2f}%')


Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 3500, 8)]         0         
                                                                 
 conv1d_3 (Conv1D)           (None, 1743, 16)          1936      
                                                                 
 max_pooling1d_3 (MaxPoolin  (None, 435, 16)           0         
 g1D)                                                            
                                                                 
 conv1d_4 (Conv1D)           (None, 426, 32)           5152      
                                                                 
 max_pooling1d_4 (MaxPoolin  (None, 213, 32)           0         
 g1D)                                                            
                                                                 
 flatten_2 (Flatten)         (None, 6816)              0   

In [14]:
y_pred_three = np.argmax(model_c_three.predict(X_test), axis=1)
y_true_three = np.argmax(y_test, axis=1)

f1 = f1_score(y_true_three, y_pred_three, average='weighted')
precision = precision_score(y_true_three, y_pred_three, average='weighted')
recall = recall_score(y_true_three, y_pred_three, average='weighted')

# Imprimir los reesultados
print("Resultados del modelo de clasificación de tres estados para datos de pecho: ")
print(f'F1-score: {f1:.4f}')
print(f'Precisión: {precision:.4f}')
print(f'Recall: {recall:.4f}')

# Imprimir la matriz de confusión
conf_matrix = confusion_matrix(y_true_three, y_pred_three)
print('Matriz de confusión:')
print(conf_matrix)

Resultados del modelo de clasificación de tres estados para datos de pecho: 
F1-score: 0.9703
Precisión: 0.9718
Recall: 0.9698
Matriz de confusión:
[[1299    2   48]
 [   7  804   12]
 [   5    6  469]]
