# Clasificación de actividades motoras con Random Forest

Este notebook implementa el pipeline completo de entrenamiento y evaluación usando Random Forest, siguiendo las recomendaciones del proyecto.

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# Ruta al dataset de features (ajusta si es necesario)
features_path = Path('../data/processed/features.csv')

# Cargar el dataset
df = pd.read_csv(features_path)
print(f"Shape: {df.shape}")
df.head()

FileNotFoundError: [Errno 2] No such file or directory: '..\\data\\processed\\features.csv'

In [None]:
# Eliminar filas con nulos (si hay)
df = df.dropna()

# Asumimos que la columna de la actividad se llama 'actividad'
X = df.drop(columns=['actividad'])
y = df['actividad']

# Codificar etiquetas si no son numéricas
y_encoded = y.astype('category').cat.codes
label_mapping = dict(enumerate(y.astype('category').cat.categories))
print("Label mapping:", label_mapping)

In [None]:
# Split estratificado en train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")

In [None]:
# Entrenar Random Forest
clf = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42)
clf.fit(X_train, y_train)

print(f"Score en train: {clf.score(X_train, y_train):.3f}")
print(f"Score en test: {clf.score(X_test, y_test):.3f}")

In [None]:
# Evaluación: métricas y matriz de confusión
y_pred = clf.predict(X_test)

print("\nReporte de clasificación:")
print(classification_report(y_test, y_pred, target_names=[label_mapping[i] for i in sorted(label_mapping)]))

print("\nMatriz de confusión:")
print(confusion_matrix(y_test, y_pred))

In [None]:
# Guardar el modelo y las métricas
from sklearn.metrics import classification_report
import json

# Guardar modelo
Path('../results').mkdir(exist_ok=True)
model_path = Path('../results/random_forest_model.joblib')
joblib.dump(clf, model_path)
print(f"Modelo guardado en: {model_path}")

# Guardar reporte de clasificación
report = classification_report(y_test, y_pred, target_names=[label_mapping[i] for i in sorted(label_mapping)], output_dict=True)
with open('../results/random_forest_metrics.json', 'w', encoding='utf-8') as f:
    json.dump(report, f, indent=2, ensure_ascii=False)
print("Reporte de métricas guardado en ../results/random_forest_metrics.json")