In [1]:
from io import StringIO

import matplotlib.pyplot as plt
from scipy.io import arff
import seaborn as sns
from loguru import logger
import yaml

from datetime import datetime
import polars as pl
import pandas as pd
import numpy as np
import sys
import os
from pathlib import Path
sys.path.append(str(Path.cwd().parent))

# MODEL
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import plot_model
from sklearn.preprocessing import (
    LabelEncoder, 
    StandardScaler,
    label_binarize
)
from sklearn.metrics import (
    classification_report, 
    confusion_matrix, 
    accuracy_score,
    precision_recall_fscore_support,
    balanced_accuracy_score,
    roc_auc_score,
    roc_curve
)

# Save variables for model
import joblib

# PERSONAL FUNCTIONS
from utils import *
from models.main import *
from models.optimizer import ViterbiLiteDecoder
from functions.windows import create_feature_windows # creación de ventanas e ingenieria de características

In [428]:
accuracy = []

In [630]:
np.median(accuracy)

0.9642857142857143

In [429]:
id = 1

In [606]:
temp = r"F:\UPC\Tesis\HARbit-Model\src\data\real-data\sit-data\sit_left_0" + str(id) + ".json"
id += 1

In [607]:
path = temp
path

'F:\\UPC\\Tesis\\HARbit-Model\\src\\data\\real-data\\sit-data\\sit_left_09.json'

In [608]:
target = "Sit"

In [609]:
import json
import os

with open(path, 'rb') as file:
    data = json.load(file)

gyro_df = data['gyro']
accel_df = data['accel']

In [610]:
accel_temp = pl.DataFrame(accel_df)
gyro_temp = pl.DataFrame(gyro_df)

In [611]:
accel_temp = accel_temp.with_columns(pl.lit('A').alias('Usuario'))
gyro_temp  = gyro_temp.with_columns(pl.lit('A').alias('Usuario'))

In [612]:
accel_temp   = accel_temp.with_columns(pl.lit(target).alias('gt'))
gyro_temp    = gyro_temp.with_columns(pl.lit(target).alias('gt'))

In [613]:
df_accel = normalize_columns(accel_temp,
                            user_col_name  = "Usuario", 
                            timestamp_col_name = "timestamp", 
                            label_col_name = "gt", 
                            x_col_name = "x", 
                            y_col_name = "y", 
                            z_col_name = "z")

df_gyro = normalize_columns(gyro_temp, 
                            user_col_name  = "Usuario", 
                            timestamp_col_name = "timestamp", 
                            label_col_name = "gt", 
                            x_col_name = "x", 
                            y_col_name = "y", 
                            z_col_name = "z")

In [614]:
df_accel = convert_timestamp(df_accel)
df_gyro = convert_timestamp(df_gyro)

In [615]:
def plot_axes_by_activity(df_gyro, df_accel, max_samples_per_activity=1000, figsize=(15, 12)):
    """
    Visualiza los ejes X, Y, Z del giroscopio y acelerómetro por cada actividad
    """
    # Obtener actividades únicas
    activities = df_gyro['Activity Label'].unique().tolist()
    n_activities = len(activities)
    
    # Crear subplots
    fig, axes = plt.subplots(n_activities, 2, figsize=figsize)
    if n_activities == 1:
        axes = axes.reshape(1, -1)
    
    for i, activity in enumerate(activities):
        # Filtrar datos por actividad
        gyro_activity = df_gyro[df_gyro['Activity Label'] == activity]
        accel_activity =  df_accel[df_accel['Activity Label'] == activity]
        
        # Limitar muestras para mejor visualización
        if len(gyro_activity) > max_samples_per_activity:
            gyro_activity = gyro_activity.head(max_samples_per_activity)
        if len(accel_activity) > max_samples_per_activity:
            accel_activity = accel_activity.head(max_samples_per_activity)
        
        # Convertir a pandas para plotting
        gyro_pd = gyro_activity.copy()
        accel_pd = accel_activity.copy()
        
        # Plot Giroscopio
        axes[i, 0].plot(gyro_pd['X'], label='X', alpha=0.7, linewidth=1)
        axes[i, 0].plot(gyro_pd['Y'], label='Y', alpha=0.7, linewidth=1)
        axes[i, 0].plot(gyro_pd['Z'], label='Z', alpha=0.7, linewidth=1)
        axes[i, 0].set_title(f'Giroscopio - {activity}')
        axes[i, 0].set_ylabel('Valor')
        axes[i, 0].legend()
        axes[i, 0].grid(True, alpha=0.3)
        
        # Plot Acelerómetro
        axes[i, 1].plot(accel_pd['X'], label='X', alpha=0.7, linewidth=1)
        axes[i, 1].plot(accel_pd['Y'], label='Y', alpha=0.7, linewidth=1)
        axes[i, 1].plot(accel_pd['Z'], label='Z', alpha=0.7, linewidth=1)
        axes[i, 1].set_title(f'Acelerómetro - {activity}')
        axes[i, 1].set_ylabel('Valor')
        axes[i, 1].legend()
        axes[i, 1].grid(True, alpha=0.3)
        
        # Añadir xlabel solo en la última fila
        if i == n_activities - 1:
            axes[i, 0].set_xlabel('Muestra')
            axes[i, 1].set_xlabel('Muestra')
    
    plt.tight_layout()
    plt.show()

# Ejecutar visualización
# plot_axes_by_activity(df_gyro.to_pandas(), df_accel.to_pandas(), max_samples_per_activity = 2000,  figsize = (20, 5))

In [616]:
def analyze_sampling_rate(df, user_id=None, activity=None, plot=True):
    """
    Analiza la frecuencia de muestreo real de los datos
    
    Args:
        df: DataFrame con datos de sensores
        user_id: Usuario específico (opcional)
        activity: Actividad específica (opcional)
        plot: Si mostrar gráficos
    
    Returns:
        dict: Información detallada del muestreo
    """
    import matplotlib.pyplot as plt
    
    # Convertir a pandas si es necesario
    if hasattr(df, 'to_pandas'):
        df_pd = df.to_pandas()
    else:
        df_pd = df.copy()
    
    # Filtrar por usuario y/o actividad si se especifica
    if user_id is not None:
        df_pd = df_pd[df_pd['Subject-id'] == user_id]
    if activity is not None:
        df_pd = df_pd[df_pd['Activity Label'] == activity]
    
    # Asegurar que Timestamp es datetime
    if df_pd['Timestamp'].dtype == 'object':
        df_pd['Timestamp'] = pd.to_datetime(df_pd['Timestamp'])
    elif df_pd['Timestamp'].dtype == 'int64':
        df_pd['Timestamp'] = pd.to_datetime(df_pd['Timestamp'])
    
    print(f"📊 ANÁLISIS DE FRECUENCIA DE MUESTREO")
    print(f"=" * 50)
    print(f"  Total de muestras: {len(df_pd):,}")
    
    if len(df_pd) < 2:
        print("❌ Insuficientes datos para análisis")
        return None
    
    # Ordenar por timestamp
    df_pd = df_pd.sort_values('Timestamp')
    
    # Calcular diferencias de tiempo
    time_diffs = df_pd['Timestamp'].diff().dt.total_seconds().dropna()
    
    # Estadísticas generales
    total_duration = (df_pd['Timestamp'].max() - df_pd['Timestamp'].min()).total_seconds()
    avg_sampling_rate = (len(df_pd) - 1) / total_duration if total_duration > 0 else 0
    
    print(f"  Duración total: {total_duration:.2f} segundos")
    print(f"  📡 Frecuencia promedio: {avg_sampling_rate:.2f} Hz")
    
    # Estadísticas de intervalos
    print(f"\n📈 ESTADÍSTICAS DE INTERVALOS:")
    print(f"  Intervalo promedio: {time_diffs.mean():.4f}s ({1/time_diffs.mean():.1f} Hz)")
    print(f"  Intervalo mínimo: {time_diffs.min():.4f}s ({1/time_diffs.min():.1f} Hz)")
    print(f"  Intervalo máximo: {time_diffs.max():.4f}s ({1/time_diffs.max():.1f} Hz)")
    print(f"  Desviación estándar: {time_diffs.std():.4f}s")
    
    # Detectar frecuencias instantáneas
    instant_frequencies = 1 / time_diffs
    instant_frequencies = instant_frequencies[instant_frequencies < 1000]  # Filtrar valores extremos
    
    print(f"\n🎯 FRECUENCIAS INSTANTÁNEAS:")
    print(f"  Frecuencia modal: {instant_frequencies.mode().iloc[0]:.1f} Hz")
    print(f"  Mediana: {instant_frequencies.median():.1f} Hz")
    print(f"  Rango: {instant_frequencies.min():.1f} - {instant_frequencies.max():.1f} Hz")
    
    if plot:
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        fig.suptitle('Análisis de Frecuencia de Muestreo', fontsize=16)
        
        # 1. Histograma de intervalos
        axes[0,0].hist(time_diffs, bins=50, alpha=0.7, color='skyblue', edgecolor='black')
        axes[0,0].set_title('Distribución de Intervalos de Tiempo')
        axes[0,0].set_xlabel('Intervalo (segundos)')
        axes[0,0].set_ylabel('Frecuencia')
        axes[0,0].axvline(time_diffs.mean(), color='red', linestyle='--', 
                         label=f'Media: {time_diffs.mean():.4f}s')
        axes[0,0].legend()
        axes[0,0].grid(True, alpha=0.3)
        
        # 2. Histograma de frecuencias instantáneas
        axes[0,1].hist(instant_frequencies, bins=50, alpha=0.7, color='lightgreen', edgecolor='black')
        axes[0,1].set_title('Distribución de Frecuencias Instantáneas')
        axes[0,1].set_xlabel('Frecuencia (Hz)')
        axes[0,1].set_ylabel('Frecuencia')
        axes[0,1].axvline(instant_frequencies.median(), color='red', linestyle='--', 
                         label=f'Mediana: {instant_frequencies.median():.1f}Hz')
        axes[0,1].legend()
        axes[0,1].grid(True, alpha=0.3)
        
        # 3. Serie temporal de intervalos
        sample_indices = np.linspace(0, len(time_diffs)-1, min(1000, len(time_diffs))).astype(int)
        axes[1,0].plot(sample_indices, time_diffs.iloc[sample_indices], 'b-', alpha=0.7, linewidth=1)
        axes[1,0].set_title('Serie Temporal de Intervalos')
        axes[1,0].set_xlabel('Muestra')
        axes[1,0].set_ylabel('Intervalo (segundos)')
        axes[1,0].grid(True, alpha=0.3)
        
        # 4. Serie temporal de frecuencias
        axes[1,1].plot(sample_indices, instant_frequencies.iloc[sample_indices], 'g-', alpha=0.7, linewidth=1)
        axes[1,1].set_title('Serie Temporal de Frecuencias')
        axes[1,1].set_xlabel('Muestra')
        axes[1,1].set_ylabel('Frecuencia (Hz)')
        axes[1,1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
    
    # Detectar irregularidades
    print(f"\n🔍 DETECCIÓN DE IRREGULARIDADES:")
    
    # Gaps grandes (> 1 segundo)
    large_gaps = time_diffs[time_diffs > 1.0]
    print(f"  Gaps > 1s: {len(large_gaps)} ({100*len(large_gaps)/len(time_diffs):.1f}%)")
    
    # Variabilidad alta
    cv = time_diffs.std() / time_diffs.mean()  # Coeficiente de variación
    print(f"  Coeficiente de variación: {cv:.3f}")
    
    if cv > 0.1:
        print("  ⚠️ Alta variabilidad en el muestreo")
    else:
        print("  ✅ Muestreo relativamente estable")
    
    return {
        'total_samples': len(df_pd),
        'duration_seconds': total_duration,
        'avg_sampling_rate': avg_sampling_rate,
        'time_intervals': {
            'mean': time_diffs.mean(),
            'std': time_diffs.std(),
            'min': time_diffs.min(),
            'max': time_diffs.max()
        },
        'frequencies': {
            'mean': instant_frequencies.mean(),
            'median': instant_frequencies.median(),
            'mode': instant_frequencies.mode().iloc[0] if len(instant_frequencies.mode()) > 0 else None,
            'min': instant_frequencies.min(),
            'max': instant_frequencies.max()
        },
        'irregularities': {
            'large_gaps_count': len(large_gaps),
            'coefficient_of_variation': cv
        }
    }

# # Analizar tus datos
# print("🔍 Analizando frecuencia de muestreo de df_accel...")
# sampling_info = analyze_sampling_rate(df_accel, plot=True)

In [617]:
def create_raw_windows_250_timesteps_robust(df, window_seconds=5, overlap_percent=50, 
                                           sampling_rate=20, target_timesteps=250,
                                           min_data_threshold=0.5, max_gap_seconds=1.0):
    """
    Versión ROBUSTA: Crea ventanas basadas en TIEMPO REAL con validación mejorada
    
    Args:
        df: DataFrame con datos de sensores (Polars o Pandas)
        window_seconds: Duración de la ventana en segundos (default: 5)
        overlap_percent: Porcentaje de solapamiento (default: 50)
        sampling_rate: Frecuencia de muestreo en Hz (default: 20)
        target_timesteps: Número objetivo de timesteps por ventana (default: 250)
        min_data_threshold: Umbral mínimo de datos válidos (0.5 = 50%)
        max_gap_seconds: Máximo gap permitido en segundos (1.0s)
        
    Returns:
        X: Array con forma (n_windows, 250, 3) - datos de ventanas
        y: Array con etiquetas de actividad
        subjects: Array con IDs de usuario
        metadata: DataFrame con información de las ventanas
    """
    
    print(f"🔧 Configuración de ventanas RAW ROBUSTA:")
    print(f"  Duración: {window_seconds}s")
    print(f"  Timesteps objetivo: {target_timesteps}")
    print(f"  Frecuencia de muestreo: {sampling_rate}Hz")
    print(f"  Solapamiento: {overlap_percent}%")
    print(f"  Umbral mínimo de datos: {min_data_threshold*100:.1f}%")
    print(f"  Máximo gap permitido: {max_gap_seconds}s")
    
    # Convertir a pandas si es necesario
    if hasattr(df, 'to_pandas'):
        df_pd = df.to_pandas()
    else:
        df_pd = df.copy()
    
    # Asegurar que Timestamp es datetime
    if df_pd['Timestamp'].dtype == 'object':
        df_pd['Timestamp'] = pd.to_datetime(df_pd['Timestamp'])
    elif df_pd['Timestamp'].dtype == 'int64':
        df_pd['Timestamp'] = pd.to_datetime(df_pd['Timestamp'])
    
    # Calcular parámetros de tiempo
    window_duration_ns = int(window_seconds * 1e9)
    step_duration_ns = int(window_duration_ns * (100 - overlap_percent) / 100)
    
    print(f"  Duración de ventana: {window_seconds}s")
    print(f"  Paso entre ventanas: {step_duration_ns / 1e9:.2f}s")
    
    # Listas para almacenar resultados
    X_windows = []
    y_labels = []
    subjects_list = []
    metadata_list = []
    
    total_windows_attempted = 0
    total_windows_created = 0
    
    # Procesar por usuario y actividad
    for (user_id, activity), group in df_pd.groupby(['Subject-id', 'Activity Label']):
        
        # Ordenar por timestamp y limpiar datos
        group = group.sort_values('Timestamp').reset_index(drop=True)
        group = group.dropna(subset=['X', 'Y', 'Z', 'Timestamp'])
        
        if len(group) < window_seconds * sampling_rate:
            print(f"⚠️ Usuario {user_id}, Actividad {activity}: Muy pocos datos ({len(group)} muestras)")
            continue
        
        # Convertir timestamps a nanosegundos
        if group['Timestamp'].dtype.name.startswith('datetime'):
            timestamps_ns = group['Timestamp'].astype('int64')
        else:
            timestamps_ns = group['Timestamp'].values
        
        print(f"👤 Usuario {user_id}, Actividad {activity}: {len(group)} muestras")
        
        # Obtener rango temporal
        start_time_ns = timestamps_ns.min()
        end_time_ns = timestamps_ns.max()
        total_duration_s = (end_time_ns - start_time_ns) / 1e9
        
        print(f"   Duración total: {total_duration_s:.1f}s")
        
        # Detectar y reportar gaps grandes
        time_diffs = np.diff(timestamps_ns) / 1e9  # Convertir a segundos
        large_gaps = time_diffs > max_gap_seconds
        if np.any(large_gaps):
            n_gaps = np.sum(large_gaps)
            max_gap = np.max(time_diffs)
            print(f"   ⚠️ Detectados {n_gaps} gaps > {max_gap_seconds}s (máximo: {max_gap:.1f}s)")
        
        # Crear ventanas deslizantes
        window_count = 0
        current_start_ns = start_time_ns
        
        while current_start_ns + window_duration_ns <= end_time_ns:
            total_windows_attempted += 1
            current_end_ns = current_start_ns + window_duration_ns
            
            # Filtrar datos en esta ventana temporal
            window_mask = (
                (timestamps_ns >= current_start_ns) & 
                (timestamps_ns < current_end_ns)
            )
            window_data_df = group[window_mask]
            
            # Validación de ventana
            is_valid, validation_info = validate_window_data(
                window_data_df, 
                window_seconds, 
                sampling_rate, 
                min_data_threshold,
                max_gap_seconds
            )
            
            if is_valid:
                # Extraer datos de sensores
                sensor_data = window_data_df[['X', 'Y', 'Z']].values
                window_timestamps = window_data_df['Timestamp'].values
                
                try:
                    # Redimensionar/interpolar a target_timesteps
                    resampled_window = resample_window_robust(
                        sensor_data, window_timestamps, target_timesteps, window_seconds
                    )
                    
                    # Verificar calidad final
                    if is_window_quality_good(resampled_window):
                        # Guardar datos
                        X_windows.append(resampled_window)
                        y_labels.append(activity)
                        subjects_list.append(user_id)
                        
                        # Metadata extendida
                        metadata_list.append({
                            'Subject-id': user_id,
                            'Activity Label': activity,
                            'window_start': pd.to_datetime(current_start_ns),
                            'window_end': pd.to_datetime(current_end_ns),
                            'original_samples': len(window_data_df),
                            'resampled_timesteps': target_timesteps,
                            'window_idx': window_count,
                            'actual_duration_s': window_seconds,
                            'data_coverage': validation_info['data_coverage'],
                            'max_gap_s': validation_info['max_gap'],
                            'sampling_rate_actual': validation_info['actual_rate']
                        })
                        
                        window_count += 1
                        total_windows_created += 1
                    else:
                        print(f"   ❌ Ventana {window_count}: Calidad de datos insuficiente después de interpolación")
                
                except Exception as e:
                    print(f"   ❌ Ventana {window_count}: Error en interpolación - {str(e)}")
            
            else:
                # No mostrar warning para cada ventana inválida, solo resumen
                pass
            
            # Mover al siguiente inicio de ventana
            current_start_ns += step_duration_ns
        
        print(f"  ✅ Creadas {window_count} ventanas válidas")
    
    # Resumen final
    print(f"\n📊 RESUMEN DE VALIDACIÓN:")
    print(f"  Ventanas intentadas: {total_windows_attempted}")
    print(f"  Ventanas creadas: {total_windows_created}")
    print(f"  Tasa de éxito: {(total_windows_created/total_windows_attempted)*100:.1f}%")
    
    # Convertir a arrays numpy
    if len(X_windows) > 0:
        X = np.array(X_windows)
        y = np.array(y_labels)
        subjects = np.array(subjects_list)
        metadata_df = pd.DataFrame(metadata_list)
        
        print(f"\n📊 RESULTADO FINAL (ROBUSTO):")
        print(f"  Forma de X: {X.shape}")
        print(f"  Forma de y: {y.shape}")
        print(f"  Total ventanas: {len(X)}")
        print(f"  Usuarios únicos: {len(np.unique(subjects))}")
        print(f"  Actividades únicas: {sorted(np.unique(y))}")
        
        return X, y, subjects, metadata_df
    
    else:
        print("❌ No se crearon ventanas válidas")
        return None, None, None, None


def validate_window_data(window_data_df, window_seconds, sampling_rate, 
                        min_data_threshold, max_gap_seconds):
    """
    Valida si una ventana de datos es aceptable
    
    Returns:
        bool: True si la ventana es válida
        dict: Información de validación
    """
    if len(window_data_df) == 0:
        return False, {'reason': 'empty', 'data_coverage': 0, 'max_gap': float('inf'), 'actual_rate': 0}
    
    # Calcular cobertura de datos esperada
    expected_samples = window_seconds * sampling_rate
    actual_samples = len(window_data_df)
    data_coverage = actual_samples / expected_samples
    
    # Si hay muy pocos datos
    if data_coverage < min_data_threshold:
        return False, {
            'reason': 'insufficient_data', 
            'data_coverage': data_coverage,
            'max_gap': float('inf'),
            'actual_rate': 0
        }
    
    # Calcular gaps en los datos
    if len(window_data_df) > 1:
        timestamps = pd.to_datetime(window_data_df['Timestamp'])
        time_diffs = timestamps.diff().dt.total_seconds().fillna(0)
        max_gap = time_diffs.max()
        actual_rate = len(window_data_df) / (timestamps.max() - timestamps.min()).total_seconds()
    else:
        max_gap = 0
        actual_rate = sampling_rate
    
    # Si hay gaps muy grandes
    if max_gap > max_gap_seconds:
        return False, {
            'reason': 'large_gap', 
            'data_coverage': data_coverage,
            'max_gap': max_gap,
            'actual_rate': actual_rate
        }
    
    # Verificar que no hay valores NaN o infinitos en los sensores
    sensor_data = window_data_df[['X', 'Y', 'Z']].values
    if np.any(np.isnan(sensor_data)) or np.any(np.isinf(sensor_data)):
        return False, {
            'reason': 'invalid_values',
            'data_coverage': data_coverage,
            'max_gap': max_gap,
            'actual_rate': actual_rate
        }
    
    return True, {
        'reason': 'valid',
        'data_coverage': data_coverage,
        'max_gap': max_gap,
        'actual_rate': actual_rate
    }


def resample_window_robust(sensor_data, timestamps, target_timesteps, window_seconds):
    """
    Versión robusta de remuestreo con múltiples estrategias
    """
    from scipy.interpolate import interp1d
    from scipy import signal
    
    if len(sensor_data) == 0:
        return np.zeros((target_timesteps, 3))
    
    original_timesteps = len(sensor_data)
    
    if original_timesteps == target_timesteps:
        return sensor_data.copy()
    
    if original_timesteps == 1:
        return np.tile(sensor_data[0], (target_timesteps, 1))
    
    try:
        # Estrategia 1: Interpolación temporal precisa
        if hasattr(timestamps[0], 'timestamp'):
            time_seconds = np.array([t.timestamp() for t in timestamps])
        elif isinstance(timestamps[0], pd.Timestamp):
            time_seconds = np.array([t.timestamp() for t in timestamps])
        else:
            time_seconds = timestamps.astype('int64') / 1e9
        
        # Normalizar tiempos
        time_min = time_seconds.min()
        time_max = time_seconds.max()
        
        if time_max > time_min:
            relative_times = (time_seconds - time_min) / (time_max - time_min)
        else:
            relative_times = np.linspace(0, 1, len(time_seconds))
        
        # Crear tiempos objetivo uniformes
        target_times = np.linspace(0, 1, target_timesteps)
        
        # Interpolar cada eje
        resampled_data = np.zeros((target_timesteps, 3))
        
        for axis in range(3):
            try:
                # Estrategia de interpolación según la cantidad de datos
                if original_timesteps >= target_timesteps:
                    # Downsample: usar signal.resample para preservar características
                    resampled_axis = signal.resample(sensor_data[:, axis], target_timesteps)
                else:
                    # Upsample: usar interpolación
                    if len(np.unique(relative_times)) > 1:
                        interpolator = interp1d(
                            relative_times, 
                            sensor_data[:, axis],
                            kind='cubic' if original_timesteps >= 4 else 'linear',
                            bounds_error=False,
                            fill_value='extrapolate'
                        )
                        resampled_axis = interpolator(target_times)
                    else:
                        resampled_axis = np.full(target_timesteps, sensor_data[0, axis])
                
                resampled_data[:, axis] = resampled_axis
                
            except Exception as e:
                # Fallback: interpolación lineal simple
                resampled_data[:, axis] = np.interp(
                    target_times, relative_times, sensor_data[:, axis]
                )
        
        return resampled_data
    
    except Exception as e:
        print(f"Error en remuestreo robusto: {str(e)}")
        # Último fallback: replicar la primera muestra
        return np.tile(sensor_data[0], (target_timesteps, 1))


def is_window_quality_good(resampled_window, max_std_threshold=50.0):
    """
    Verifica la calidad final de una ventana remuestreada
    """
    # Verificar NaN o infinitos
    if np.any(np.isnan(resampled_window)) or np.any(np.isinf(resampled_window)):
        return False
    
    # Verificar valores extremos (posibles errores de interpolación)
    if np.any(np.abs(resampled_window) > 1000):  # Ajustar según tus datos
        return False
    
    # Verificar varianza (datos demasiado planos pueden indicar error)
    for axis in range(resampled_window.shape[1]):
        std_axis = np.std(resampled_window[:, axis])
        if std_axis > max_std_threshold:  # Varianza excesiva
            return False
        if std_axis < 0.001:  # Datos demasiado planos
            return False
    
    return True

In [618]:
X_all, y_all, subjects_all, metadata_all = create_raw_windows_250_timesteps_robust(
    df=df_accel,
    window_seconds=5,
    overlap_percent=50,
    sampling_rate=20,
    target_timesteps=100,
    min_data_threshold=0.8,  # 80% mínimo de datos
    max_gap_seconds=1.0      # Máximo 1 segundo de gap
)

🔧 Configuración de ventanas RAW ROBUSTA:
  Duración: 5s
  Timesteps objetivo: 100
  Frecuencia de muestreo: 20Hz
  Solapamiento: 50%
  Umbral mínimo de datos: 80.0%
  Máximo gap permitido: 1.0s
  Duración de ventana: 5s
  Paso entre ventanas: 2.50s
👤 Usuario A, Actividad Sit: 1459 muestras
   Duración total: 58.0s
  ✅ Creadas 22 ventanas válidas

📊 RESUMEN DE VALIDACIÓN:
  Ventanas intentadas: 22
  Ventanas creadas: 22
  Tasa de éxito: 100.0%

📊 RESULTADO FINAL (ROBUSTO):
  Forma de X: (22, 100, 3)
  Forma de y: (22,)
  Total ventanas: 22
  Usuarios únicos: 1
  Actividades únicas: ['Sit']


In [619]:
import tensorflow as tf
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2

# Cargar como función concreta
loaded = tf.saved_model.load(r"F:\UPC\Tesis\HARbit-Model\src\cnn_temporal_20_epochs_93\saved_model")
infer = loaded.signatures["serving_default"]

In [620]:
X_tensor = tf.constant(X_all, dtype=tf.float32)

In [621]:
label_encoder = joblib.load(r'F:\UPC\Tesis\HARbit-Model\src\cnn_temporal_20_epochs_93\label_encoder.joblib')

In [622]:
y_pred = infer(X_tensor)

In [623]:
y_all = label_encoder.transform(y_all)

In [624]:
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Ejemplo: y_pred con probabilidades
y_pred_probs = list(y_pred.values())[0].numpy()

# Convertir a clases predichas (índice del máximo)
y_pred_classes = np.argmax(y_pred_probs, axis=1)

# Comparar con y_real (asegúrate de que y_real tenga etiquetas 0..N-1)
acc = accuracy_score(y_all, y_pred_classes)
print("Accuracy:", acc)

# Reporte más detallado
print(classification_report(y_all, y_pred_classes))

# Matriz de confusión
print(confusion_matrix(y_all, y_pred_classes))

Accuracy: 1.0
              precision    recall  f1-score   support

           2       1.00      1.00      1.00        22

    accuracy                           1.00        22
   macro avg       1.00      1.00      1.00        22
weighted avg       1.00      1.00      1.00        22

[[22]]


In [625]:
y_pred_classes

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
      dtype=int64)

In [626]:
label_encoder.classes_

array(['Eat', 'Others', 'Sit', 'Stand', 'Type', 'Walk', 'Workouts',
       'Write'], dtype='<U8')

In [627]:
accuracy.append(acc)