In [1]:
import pickle
import numpy as np
import seaborn as sns
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from keras.utils import to_categorical
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [2]:
path = "/kaggle/input/dataset-wrist-filt/combined_wrist_filtered.pkl"

In [3]:
df_wrist = pd.read_pickle(path)

In [4]:
df_wrist.head()

Unnamed: 0,id,ACC_x,ACC_y,ACC_z,BVP,EDA,TEMP,label
214583,2.0,39.469502,27.912214,29.039982,-109.222518,1.637703,35.809887,1.0
214584,2.0,38.595181,27.796006,29.093324,-108.599068,1.637493,35.809884,1.0
214585,2.0,37.719606,27.683257,29.114913,-107.981064,1.637285,35.809882,1.0
214586,2.0,36.849917,27.575278,29.103982,-107.368689,1.63708,35.809879,1.0
214587,2.0,35.993018,27.47325,29.060322,-106.762018,1.636878,35.809876,1.0


In [5]:
df_wrist.isnull().sum()

id       0
ACC_x    0
ACC_y    0
ACC_z    0
BVP      0
EDA      0
TEMP     0
label    0
dtype: int64

In [17]:
# Extraer features y labels
X = df_wrist[['ACC_x', 'ACC_y', 'ACC_z', 'BVP', 'EDA', 'TEMP']].values
y = df_wrist['label'].values

# Clasificación binaria de los datos de muñeca

In [18]:
# Reducción de dimensiones con el método PCA para facilitar la ejecución del modelo
pca = PCA(n_components=0.95)
reduced_X_PCA = pca.fit_transform(X)
print("Dimensiones reducidas con PCA:", np.shape(reduced_X_PCA))

Dimensiones reducidas con PCA: (23206404, 4)


In [10]:
# Clasificar label 1(fase neutral) y 3(diversión) a un grupo(no estrés) y label 2(estrés) a otro
y_binary = np.where((y == 1) | (y == 3), 0, 1)

# Separar los datos para entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(reduced_X_PCA, y_binary, test_size=0.2, random_state=50)

In [11]:
# Crear y entrenar el clasificador Random Forest
rf_classifier = RandomForestClassifier(n_estimators=10, random_state=50)
rf_classifier.fit(X_train, y_train)

In [12]:
# Predicción
y_pred = rf_classifier.predict(X_test)

# Evaluar el modelo
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Precisión: {accuracy:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)

Precisión: 0.9861
Recall: 0.9693
F1 Score: 0.9768
Confusion Matrix:
[[3224426   21414]
 [  42879 1352562]]


# Clasificación de tres estados afectivos de los datos de muñeca

In [19]:
# One hot encoding para tres clases
y = y - 1
y_3status = to_categorical(y, num_classes=3)

# Separar los datos para entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(reduced_X_PCA, y_3status, test_size=0.2, random_state=50)

In [20]:
# Crear y entrenar el clasificador Random Forest
rf_classifier = RandomForestClassifier(n_estimators=10, random_state=50)
rf_classifier.fit(X_train, y_train)

In [21]:
# Predicción
# Convertir los vectores transformados por one hot encoding a formato original para 
# poder aplicar las metricas
y_pred = np.argmax(rf_classifier.predict(X_test), axis=1)
y_true = np.argmax(y_test, axis=1)

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_true, y_pred)

print(f"Precisión: {accuracy:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)

Precisión: 0.9822
Recall: 0.9822
F1 Score: 0.9821
Confusion Matrix:
[[2441582   18304    5182]
 [  38678 1352268    4495]
 [  12854    3252  764666]]
