In [70]:
from io import StringIO

from tensorflow import keras
import matplotlib.pyplot as plt
from scipy.io import arff
import seaborn as sns
from loguru import logger
import yaml

from datetime import datetime
import polars as pl
import pandas as pd
import numpy as np
import os
import joblib

import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent))

# Metrics
from sklearn.metrics import (
    classification_report, 
    confusion_matrix, 
    accuracy_score,
    precision_recall_fscore_support,
    balanced_accuracy_score,
    roc_auc_score,
    roc_curve
)

# PERSONAL FUNCTIONS
from plots import *
from utils import *
from functions.windows import create_feature_windows # creación de ventanas e ingenieria de características

In [52]:
# Cargar datos
df_accel = pl.read_csv(r'F:\UPC\Tesis\HARbit-Model\src\test\data\Watch_accelerometer.csv')
df_gyro = pl.read_csv(r'F:\UPC\Tesis\HARbit-Model\src\test\data\Watch_gyroscope.csv')

In [53]:
df_accel = normalize_columns(df_accel, user_col_name= "User", timestamp_col_name = "Creation_Time", 
                            label_col_name = "gt", x_col_name = 'x', y_col_name = 'y', z_col_name = 'z')
df_gyro = normalize_columns(df_gyro, user_col_name= "User", timestamp_col_name = "Creation_Time", 
                            label_col_name = "gt", x_col_name = 'x', y_col_name = 'y', z_col_name = 'z')

In [54]:
df_all_sensors = df_gyro.join(df_accel, on = ['Subject-id', 'Timestamp', 'Activity Label'], how = "inner")

In [55]:
df_gyro_inner = df_all_sensors.select(
    pl.col('Subject-id'),
    pl.col('Activity Label'),
    pl.col('Timestamp'),
    pl.col('X').alias('X'),
    pl.col('Y').alias('Y'),
    pl.col('Z').alias('Z')
)

In [56]:
df_accel_inner = df_all_sensors.select(
    pl.col('Subject-id'),
    pl.col('Activity Label'),
    pl.col('Timestamp'),
    pl.col('X_right').alias('X'),
    pl.col('Y_right').alias('Y'),
    pl.col('Z_right').alias('Z')
)

In [57]:
df_accel = convert_timestamp(df_accel_inner)
df_gyro = convert_timestamp(df_gyro_inner)

In [58]:
df_accel_pro = create_feature_windows(df_accel, window_seconds = 5, overlap_percent = 50, sampling_rate = 100)
df_gyro_pro = create_feature_windows(df_gyro, window_seconds = 5, overlap_percent = 50, sampling_rate = 100)

In [108]:
features_combined = pd.merge(
    df_gyro_pro,
    df_accel_pro, 
    on=['Subject-id', 'Activity Label', 'window_start', 'window_end', 'sample_count'], 
    how="inner"
)

In [71]:
model = keras.models.load_model(r"F:\UPC\Tesis\HARbit-Model\src\models\cnn-lstm_har_model_82.h5")
label_encoder = joblib.load(r"F:\UPC\Tesis\HARbit-Model\src\models\config\label_encoder.joblib")



In [109]:
features_combined = features_combined[features_combined['Activity Label'].isin(['walk', 'stairsup', 'sit', 'stand'])]

In [110]:
dict_tempo = {'walk': 'A', 'stairsup' : 'C', 'sit': 'D', 'stand': 'E'}

In [111]:
features_combined['Activity Label'] = label_encoder.transform(features_combined['Activity Label'].apply(lambda x: dict_tempo[x]))

In [113]:
def prepare_features_for_cnn_lstm_direct(features_df):
    """
    Prepara características ya extraídas DIRECTAMENTE para CNN-LSTM
    Sin crear ventanas adicionales - cada fila es una muestra independiente
    """
    # Convertir a pandas si es necesario
    if hasattr(features_df, 'to_pandas'):
        features_df = features_df.to_pandas()

    # Identificar columnas de características
    metadata_cols = ['Subject-id', 'Activity Label', 'window_start', 'window_end', 'sample_count']
    feature_cols = [col for col in features_df.columns if col not in metadata_cols]
    
    print(f"Características detectadas: {len(feature_cols)}")
    print(f"Muestras totales: {len(features_df)}")
    
    # Extraer características y etiquetas directamente
    X = features_df[feature_cols].values
    y = features_df['Activity Label'].values
    subjects = features_df['Subject-id'].values
    
    # Reshape para CNN: (samples, timesteps=1, features)
    # Cada ventana de 5s es una muestra individual
    X_reshaped = X.reshape(X.shape[0], 1, X.shape[1])

    return X_reshaped, y, subjects #, feature_cols

In [114]:
x, y, _ = prepare_features_for_cnn_lstm_direct(features_combined)

Características detectadas: 136
Muestras totales: 2923


In [119]:
np.unique(y)

array([0, 2, 3, 4])

In [120]:
nombres_clases_filtrados

['A', 'C', 'D', 'E']

In [117]:
# Obtener las clases únicas presentes en tus datos de prueba
clases_presentes = np.unique(y)

# Obtener los nombres de las clases correspondientes del label_encoder
nombres_clases_filtrados = [label_encoder.classes_[i] for i in clases_presentes]

print("Evaluando modelo...")

# Predicciones
y_pred = model.predict(x)
y_pred_classes = np.argmax(y_pred, axis=1)

# Métricas básicas
test_loss, test_accuracy = model.evaluate(x, y, verbose=0)
print(f"\nPérdida en test: {test_loss:.4f}")
print(f"Precisión en test: {test_accuracy:.4f}")

# Reporte de clasificación detallado (usando las etiquetas filtradas)
print("\n" + "="*50)
print("REPORTE DE CLASIFICACIÓN")
print("="*50)
print(classification_report(
    y, 
    y_pred_classes, 
    target_names=nombres_clases_filtrados,
    digits=4
))

# Matriz de confusión (usando las etiquetas filtradas)
cm = confusion_matrix(y, y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(
    cm, 
    annot=True, 
    fmt='d', 
    cmap='Blues',
    xticklabels=nombres_clases_filtrados,
    yticklabels=nombres_clases_filtrados
)
plt.title('Matriz de Confusión - CNN-LSTM')
plt.xlabel('Predicción')
plt.ylabel('Valor Real')
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

Evaluando modelo...
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

Pérdida en test: 9.4177
Precisión en test: 0.1016

REPORTE DE CLASIFICACIÓN


ValueError: Number of classes, 5, does not match size of target_names, 4. Try specifying the labels parameter

In [44]:
balanced_plot(pl.DataFrame(features_combined), 'balance_plot_uci_activities')