# Clasificación de actividades motoras con Random Forest

Este notebook implementa el pipeline completo de entrenamiento y evaluación usando Random Forest, siguiendo las recomendaciones del proyecto.

In [13]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# Ruta al dataset de features
features_path = Path('../results/features.csv')

# Cargar el dataset
df = pd.read_csv(features_path)
print(f"Shape: {df.shape}")
df.head()

Shape: (1453, 10)


Unnamed: 0,video,start_frame,end_frame,knee_angle_mean,knee_angle_std,trunk_incl_mean,trunk_incl_std,dist_sh_hip_mean,dist_sh_hip_std,actividad
0,Video 1.mp4,0,14,130.628125,2.872056,13.445633,0.349988,0.085713,0.000753,sentado
1,Video 1.mp4,7,21,137.670738,6.494605,15.133279,1.784241,0.083579,0.002886,sentado
2,Video 1.mp4,14,28,139.084214,7.027328,16.858814,1.088131,0.075826,0.008015,parandose
3,Video 1.mp4,21,35,146.722123,13.878174,15.8546,1.88055,0.070252,0.006627,parandose
4,Video 1.mp4,28,42,165.366983,8.949651,13.866931,1.522714,0.081177,0.014008,parandose


In [19]:
# Eliminar filas con nulos (si hay)
df = df.dropna()

# Eliminar columnas no numéricas (video y actividad)
X = df.drop(columns=['video', 'actividad'])
y = df['actividad']

# Codificar etiquetas si no son numéricas
y_encoded = y.astype('category').cat.codes
label_mapping = dict(enumerate(y.astype('category').cat.categories))
print("Label mapping:", label_mapping)

Label mapping: {0: 'caminado atras', 1: 'caminando adelante', 2: 'girando', 3: 'parado', 4: 'parandose', 5: 'sentado', 6: 'sentandose'}


In [20]:
# Split estratificado en train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")

Train shape: (431, 8), Test shape: (108, 8)


In [21]:
# Entrenar Random Forest
clf = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42)
clf.fit(X_train, y_train)

print(f"Score en train: {clf.score(X_train, y_train):.3f}")
print(f"Score en test: {clf.score(X_test, y_test):.3f}")

Score en train: 1.000
Score en test: 0.824


In [22]:
# Evaluación: métricas y matriz de confusión
y_pred = clf.predict(X_test)

print("\nReporte de clasificación:")
print(classification_report(y_test, y_pred, target_names=[label_mapping[i] for i in sorted(label_mapping)]))

print("\nMatriz de confusión:")
print(confusion_matrix(y_test, y_pred))


Reporte de clasificación:
                    precision    recall  f1-score   support

    caminado atras       0.89      1.00      0.94        16
caminando adelante       0.86      0.92      0.89        26
           girando       0.70      0.86      0.78        22
            parado       0.75      0.33      0.46         9
         parandose       0.86      0.86      0.86        14
           sentado       0.82      0.82      0.82        11
        sentandose       1.00      0.60      0.75        10

          accuracy                           0.82       108
         macro avg       0.84      0.77      0.78       108
      weighted avg       0.83      0.82      0.81       108


Matriz de confusión:
[[16  0  0  0  0  0  0]
 [ 0 24  2  0  0  0  0]
 [ 2  0 19  0  1  0  0]
 [ 0  3  3  3  0  0  0]
 [ 0  1  0  1 12  0  0]
 [ 0  0  1  0  1  9  0]
 [ 0  0  2  0  0  2  6]]


In [23]:
# Guardar el modelo y las métricas
from sklearn.metrics import classification_report
import json

# Guardar modelo
Path('../results').mkdir(exist_ok=True)
model_path = Path('../results/random_forest_model.joblib')
joblib.dump(clf, model_path)
print(f"Modelo guardado en: {model_path}")

# Guardar reporte de clasificación
report = classification_report(y_test, y_pred, target_names=[label_mapping[i] for i in sorted(label_mapping)], output_dict=True)
with open('../results/random_forest_metrics.json', 'w', encoding='utf-8') as f:
    json.dump(report, f, indent=2, ensure_ascii=False)
print("Reporte de métricas guardado en ../results/random_forest_metrics.json")

Modelo guardado en: ..\results\random_forest_model.joblib
Reporte de métricas guardado en ../results/random_forest_metrics.json
