## With outliers

In [None]:
# Combinar todos los dataframes en uno solo para facilitar la visualización
all_data = []
relabeled_dfs = {}

# Buscar todas las variables df_relabeled_X_200 en el espacio global
for var_name in list(globals().keys()):
    if var_name.startswith('df_relabeled_') and var_name.endswith('_200'):
        relabeled_value = var_name.split('_')[2]  # Extraer el valor de relabeled
        relabeled_dfs[relabeled_value] = globals()[var_name]
        
        # Añadir los datos al conjunto combinado
        df_copy = globals()[var_name].copy()
        all_data.append(df_copy)

# Combinar todos los dataframes
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    metric_columns = [col for col in combined_df.columns if col not in ['subject', 'relabeled', 'stimulus']]

# Aplicar MinMaxScaler a las columnas de métricas
    scaler = MinMaxScaler()
    combined_df[metric_columns] = scaler.fit_transform(combined_df[metric_columns])
    # Identificar columnas de métricas (excluyendo columnas de metadatos)
    metric_columns = [col for col in combined_df.columns 
                    if col not in ['subject', 'relabeled', 'stimulus']]
    
    # Crear una figura con subplots para cada métrica
    n_metrics = len(metric_columns)
    fig, axes = plt.subplots(nrows=(n_metrics+1)//2, ncols=2, figsize=(14, 3*((n_metrics+1)//2)), 
                            constrained_layout=True)
    axes = axes.flatten()
    
    # Crear boxplots para cada métrica
    for i, metric in enumerate(metric_columns):
        if i < len(axes):
            # Crear boxplot usando seaborn
            sns.boxplot(x='relabeled', y=metric, data=combined_df, ax=axes[i], palette='viridis')
            
            # Añadir títulos y etiquetas
            axes[i].set_title(f'Comparación de {metric} por grasp', fontsize=14)
            axes[i].set_xlabel('Categoría')
            axes[i].set_ylabel(metric)
            
            # Rotar etiquetas del eje x si hay muchas categorías
            if len(combined_df['relabeled'].unique()) > 5:
                axes[i].tick_params(axis='x', rotation=45)
    
    # Eliminar subplots vacíos
    for i in range(n_metrics, len(axes)):
        fig.delaxes(axes[i])
    
    # Añadir título general
    plt.suptitle('Comparación de métricas EMG entre diferentes categorías', fontsize=16, y=1.02)
    
    # Mostrar la figura
    plt.tight_layout()
    plt.show()
    
    # Análisis estadístico básico (opcional)
    print("Stadistic for grasp:")
    for metric in metric_columns:
        print(f"\nMétrica: {metric}")
        display(combined_df.groupby('relabeled')[metric].describe())
else:
    print("No se encontraron variables df_relabeled_X_200 en el espacio global.")

In [None]:
import plotly.express as px

# Verifica que combined_df esté definido correctamente
if not all_data:
    print("No se encontraron variables df_relabeled_X_200 en el espacio global.")
else:
    # Crear un boxplot interactivo para cada métrica
    for metric in metric_columns:
        fig = px.box(
            combined_df,
            x='relabeled',
            y=metric,
            color='relabeled',
            points='all',  # Muestra los puntos individuales
            hover_data=['subject'],  # Mostrar 'subject' al pasar el cursor
            title=f'Boxplot interactivo para {metric} por grasp'
        )
        fig.update_layout(
            xaxis_title='Grasp (relabeled)',
            yaxis_title=metric,
            boxmode='group',
            showlegend=False
        )
        fig.show()


- Fisher Score analysis 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway
import numpy as np

# Combinar todos los dataframes en uno solo para facilitar la visualización
all_data = []
relabeled_dfs = {}

# Buscar todas las variables df_relabeled_X_200 en el espacio global
for var_name in list(globals().keys()):
    if var_name.startswith('df_relabeled_') and var_name.endswith('_200'):
        relabeled_value = var_name.split('_')[2]  # Extraer el valor de relabeled
        relabeled_dfs[relabeled_value] = globals()[var_name]
        
        # Añadir los datos al conjunto combinado
        df_copy = globals()[var_name].copy()
        all_data.append(df_copy)

# Combinar todos los dataframes
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    metric_columns = [col for col in combined_df.columns if col not in ['subject', 'relabeled', 'stimulus']]

# Aplicar MinMaxScaler a las columnas de métricas
    scaler = MinMaxScaler()
    combined_df[metric_columns] = scaler.fit_transform(combined_df[metric_columns])
    # Identificar columnas de métricas (excluyendo columnas de metadatos)
    metric_columns = [col for col in combined_df.columns 
                    if col not in ['subject', 'relabeled', 'stimulus']]
    
    # Crear una figura con subplots para cada métrica
    n_metrics = len(metric_columns)
    fig, axes = plt.subplots(nrows=(n_metrics+1)//2, ncols=2, figsize=(14, 3*((n_metrics+1)//2)), 
                            constrained_layout=True)
    axes = axes.flatten()
    
    # Crear boxplots para cada métrica
    for i, metric in enumerate(metric_columns):
        if i < len(axes):
            # Crear boxplot usando seaborn
            sns.boxplot(x='relabeled', y=metric, data=combined_df, ax=axes[i], palette='viridis')
            
            # Añadir títulos y etiquetas
            axes[i].set_title(f'Comparación de {metric} por grasp', fontsize=14)
            axes[i].set_xlabel('Categoría')
            axes[i].set_ylabel(metric)
            
            # Rotar etiquetas del eje x si hay muchas categorías
            if len(combined_df['relabeled'].unique()) > 5:
                axes[i].tick_params(axis='x', rotation=45)
    
    # Eliminar subplots vacíos
    for i in range(n_metrics, len(axes)):
        fig.delaxes(axes[i])
    
    # Añadir título general
    plt.suptitle('Comparación de métricas EMG entre diferentes categorías', fontsize=16, y=1.02)
    
    # Mostrar la figura
    plt.tight_layout()
    plt.show()
    
    # 🧪 Calcular ANOVA y Fisher Score
    anova_results = {}
    fisher_scores = {}

    categories = combined_df['relabeled'].unique()

    for metric in metric_columns:
        # Agrupar por categoría para ANOVA
        groups = [combined_df[combined_df['relabeled'] == cat][metric].dropna().values for cat in categories]
        
        # ANOVA
        try:
            f_stat, p_val = f_oneway(*groups)
        except:
            f_stat, p_val = np.nan, np.nan
        
        anova_results[metric] = {'F-statistic': f_stat, 'p-value': p_val}
        
        # Fisher Score
        overall_mean = combined_df[metric].mean()
        num = 0
        den = 0
        
        for cat in categories:
            class_data = combined_df[combined_df['relabeled'] == cat][metric]
            ni = len(class_data)
            class_mean = class_data.mean()
            class_var = class_data.var()
            
            num += ni * (class_mean - overall_mean) ** 2
            den += ni * class_var
        
        fisher = num / den if den != 0 else 0
        fisher_scores[metric] = fisher

    # 📋 Mostrar resultados ordenados por Fisher Score
    results_df = pd.DataFrame.from_dict(anova_results, orient='index')
    results_df['Fisher Score'] = pd.Series(fisher_scores)
    results_df_sorted = results_df.sort_values(by='Fisher Score', ascending=False)

    print("\n📊 Resultados ANOVA y Fisher Score ordenados:\n")
    display(results_df_sorted)

else:
    print("No se encontraron variables df_relabeled_X_200 en el espacio global.")

In [None]:
df = results_df_sorted[results_df_sorted['F-statistic'] > 5]
df = df[df['p-value'] < 0.05]
df = df[df['Fisher Score'] > 0.5]
df

In [None]:
inter_subject_stability = {}

for metric in metric_columns:
    stability_per_class = {}
    for label in combined_df['relabeled'].unique():
        subset = combined_df[combined_df['relabeled'] == label]
        # Media por sujeto
        subject_means = subset.groupby('subject')[metric].mean()
        # STD entre sujetos
        std_across_subjects = subject_means.std()
        stability_per_class[label] = std_across_subjects
    inter_subject_stability[metric] = stability_per_class

# Convertir a DataFrame para visualizar
stability_df = pd.DataFrame(inter_subject_stability).T
display(stability_df)

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Excluir métricas específicas
excluded_features = ['ZC', 'ZC_STD', 'Kurt', 'Kurt_STD']
filtered_metric_columns = [col for col in metric_columns if col not in excluded_features]

# Filtrar columnas relevantes en el DataFrame (sin eliminar atípicos)
filtered_df = combined_df[['relabeled'] + filtered_metric_columns].copy()

# 1. Calcular la mediana por grupo (relabeled) para cada métrica
median_df = filtered_df.groupby('relabeled')[filtered_metric_columns].median()

# 2. Calcular la varianza entre las medianas para cada métrica
median_variance = median_df.var()

# 3. Convertir a DataFrame para visualización
median_variance_df = median_variance.reset_index()
median_variance_df.columns = ['métrica', 'varianza_entre_medianas']

# 4. Normalizar las varianzas (Min-Max)
scaler = MinMaxScaler()
median_variance_df['varianza_normalizada'] = scaler.fit_transform(
    median_variance_df[['varianza_entre_medianas']]
)

# 5. Mostrar la tabla ordenada (opcional)
print("Varianza entre medianas y su normalización:")
display(median_variance_df.sort_values(by='varianza_normalizada', ascending=False))

# 6. Visualización con barplot
plt.figure(figsize=(10, 6))
sns.barplot(
    x='varianza_normalizada',
    y='métrica',
    data=median_variance_df.sort_values(by='varianza_normalizada', ascending=True),
    palette='viridis'
)
plt.title('Varianza normalizada entre medianas por métrica (con atípicos)', fontsize=14)
plt.xlabel('Varianza normalizada')
plt.ylabel('Métrica')
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

In [None]:
corr_matrix = combined_df[metric_columns].corr()

# Crear el heatmap de correlación
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Mapa de Correlación de Métricas')
plt.show()

In [None]:
display(corr_matrix)

In [None]:
# Obtener la matriz en forma de pares con sus correlaciones
corr_pairs = corr_matrix.unstack()

# Eliminar duplicados y la diagonal (correlación de una variable consigo misma)
# corr_pairs = corr_pairs[corr_pairs.index.get_level_values(0) != corr_pairs.index.get_level_values(1)]
# corr_pairs = corr_pairs.drop_duplicates()

# Filtrar pares con alta correlación
high_corr = corr_pairs[abs(corr_pairs) > 0.89].sort_values(ascending=False)

# Crear el DataFrame de correlaciones altas
high_corr_df = high_corr.reset_index()
high_corr_df.columns = ['Métrica 1', 'Métrica 2', 'Correlación']

# Mostrar el DataFrame
display(high_corr_df)

In [None]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# ---------- 1. Eliminar métricas específicas ----------
excluded_metrics = ['ZC', 'ZC_STD', 'Kurt', 'Kurt_STD']
filtered_metrics = [col for col in metric_columns if col not in excluded_metrics]

# ---------- 2. Entrenamiento del modelo con todos los datos (sin eliminar atípicos) ----------
X = combined_df[filtered_metrics]
y = combined_df['relabeled']

# Crear y entrenar el modelo Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# Obtener importancias de características
importances = rf_model.feature_importances_

# ---------- 3. Visualización ----------
plt.figure(figsize=(11, 6))
sns.barplot(x=filtered_metrics, y=importances, palette='viridis')
plt.title('Importancia de Características para la Clasificación de Agarre (con atípicos y sin ZC/Kurtosis)')
plt.xlabel('Métricas')
plt.ylabel('Importancia')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


- Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# ---------- 1. Definir las métricas a usar ----------
# Asegúrate de que 'filtered_metrics' esté definido, por ejemplo:
# filtered_metrics = [col for col in combined_df.columns if col not in ['subject', 'relabeled', 'stimulus']]

# ---------- 2. Definir variables X e y ----------
X = combined_df[filtered_metrics]
y = combined_df['relabeled']

# ---------- 3. Entrenar modelo ----------
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# ---------- 4. Importancia de características ----------
importances = rf_model.feature_importances_
importance_df = pd.DataFrame({
    'Métrica': filtered_metrics,
    'Importancia': importances
}).sort_values(by='Importancia', ascending=False).reset_index(drop=True)

# ---------- 5. Mostrar resultados ----------
print("Importancia de las características sin eliminar atípicos:")
display(importance_df)

# ---------- 6. Visualización ----------
plt.figure(figsize=(11, 6))
sns.barplot(x='Métrica', y='Importancia', data=importance_df, palette='viridis')
plt.title('Importancia de Características para la Clasificación de Agarre (con atípicos)')
plt.xlabel('Métricas')
plt.ylabel('Importancia')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


- Forest

In [None]:
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# ---------- 1. Definir las métricas a usar ----------
# Asegúrate de tener esta lista definida:
# filtered_metrics = [col for col in combined_df.columns if col not in ['subject', 'relabeled', 'stimulus']]

# ---------- 2. Definir variables X e y ----------
X = combined_df[filtered_metrics]
y = combined_df['relabeled']

# ---------- 3. Entrenar modelo ----------
tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(X, y)

# ---------- 4. Importancia de características ----------
importances = tree_model.feature_importances_
importances_percentage = 100 * importances / importances.sum()

importances_df = pd.DataFrame({
    'Métrica': filtered_metrics,
    'Importancia (%)': importances_percentage
}).sort_values(by='Importancia (%)', ascending=False).reset_index(drop=True)

# Mostrar resultados
print("\n📊 Importancia de las características (%):")
display(importances_df)

# Verificación: la suma debería ser aproximadamente 100%
print(f"\n✅ Suma total de importancias: {importances_percentage.sum():.2f}%")

# ---------- 5. Visualización ----------
plt.figure(figsize=(11, 6))
sns.barplot(data=importances_df, x='Métrica', y='Importancia (%)', palette='viridis')
plt.title('Importancia de Características para la Clasificación de Agarre (Árbol de Decisión con atípicos)')
plt.xlabel('Métricas')
plt.ylabel('Importancia (%)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


- Covariance

In [None]:
import os
import re
import numpy as np
import pandas as pd

# ---------- Función para calcular CV ----------
def calculate_cv(metrics_df):
    cv_values = {}
    for metric in metrics_df.columns:
        mean_value = metrics_df[metric].mean()
        std_value = metrics_df[metric].std()
        if mean_value != 0:
            cv_values[metric] = (std_value / mean_value) * 100
        else:
            cv_values[metric] = np.nan
    return cv_values

# ---------- Parámetros ----------
fm = 2000
window_length = 400
overlap = 0
target_channel = "Channel 8"

all_metrics = []

# ---------- Iterar sobre los archivos ----------
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)

        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)

                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue

                test_df, grasps = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                test_df = test_df[test_df['relabeled'].isin(filtered_labels)]

                if target_channel not in test_df.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue

                emg_columns = [target_channel]
                envelope_df = src.get_envelope_lowpass(
                    test_df[emg_columns], fm=2000, cutoff_freq=0.6, envelope_type=1
                )

                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)

                window_count = 0

                for grasp in grasps:
                    try:
                        print(f"\nProcessing Grasp {grasp} in file {file_name}:")
                        grasp_df = result_df[result_df['stimulus'] == grasp]

                        if grasp_df.empty:
                            print(f"No hay datos para el grasp {grasp} en {file_name}.")
                            continue

                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)

                        for i, ventana in enumerate(ventanas):
                            if len(ventana) == window_length:
                                signal = ventana[target_channel].values
                                metrics = calculate_emg_metrics_means(signal)

                                metrics_with_meta = {
                                    "subject": folder,
                                    "relabeled": grasp_df['relabeled'].iloc[0],
                                    "stimulus": grasp,
                                    "channel": target_channel,
                                    "window_id": f"{file_name}_{grasp}_{i}",
                                    "file_name": file_name,
                                    "window_number": window_count,
                                    **metrics
                                }

                                all_metrics.append(metrics_with_meta)
                                window_count += 1
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue

                print(f"Procesadas {window_count} ventanas para el archivo {file_name}")

# ---------- Crear DataFrame general ----------
metrics_df_200 = pd.DataFrame(all_metrics)

# ---------- Reordenar columnas ----------
meta_cols = ["subject", "relabeled", "stimulus", "channel", "window_id", "file_name", "window_number"]
metric_cols = [col for col in metrics_df_200.columns if col not in meta_cols]
column_order = meta_cols + sorted(metric_cols)
metrics_df_200 = metrics_df_200[column_order]

# ---------- Filtrar métricas no deseadas ----------
excluded_metrics = ['ZC', 'ZC_STD', 'Kurt', 'Kurt_STD']
filtered_metrics = [col for col in metric_cols if col not in excluded_metrics]

# ---------- Calcular CV directamente ----------
cv_values = calculate_cv(metrics_df_200[filtered_metrics])
cv_df = pd.DataFrame.from_dict(cv_values, orient='index', columns=['Coeficiente de Variación'])
cv_df = cv_df.sort_values(by='Coeficiente de Variación', ascending=False)

# ---------- Mostrar resultados ----------
print("\n📊 Coeficiente de variación de las métricas (con atípicos, sin ZC/Kurtosis):")
display(cv_df)

print("\n📈 Resumen de métricas por tipo de movimiento (completo):")
grouped_df = metrics_df_200.drop(columns=['channel'], errors='ignore')
summary_by_subject_movement_200 = grouped_df.select_dtypes(include=['number']).groupby(['relabeled']).mean()
display(summary_by_subject_movement_200)

print(f"\n✅ Total de ventanas procesadas: {len(metrics_df_200)}")
print(f"📌 Distribución por sujeto:\n{metrics_df_200['subject'].value_counts()}")
print(f"📌 Distribución por movimiento:\n{metrics_df_200['relabeled'].value_counts()}")


In [None]:
# Normalización Min-Max del Coeficiente de Variación

cv_df['Coeficiente de Variación Normalizado'] = (cv_df['Coeficiente de Variación'] - cv_df['Coeficiente de Variación'].min()) / (cv_df['Coeficiente de Variación'].max() - cv_df['Coeficiente de Variación'].min())
# Mostrar los valores normalizados

display(cv_df)

- PCA

In [None]:
print(summary_by_movement_200.columns.tolist())
summary_by_subject_movement_200 = summary_by_subject_movement_200.reset_index()


In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
import plotly.express as px

# 1. Seleccionar métricas (excluir ZC, ZC_STD, Kurt, Kurt_STD)
excluded = ['ZC', 'ZC_STD', 'Kurt', 'Kurt_STD']
features = [c for c in summary_by_relabeled_200.columns 
            if c not in ['subject','relabeled','stimulus'] 
            and not any(exc.upper() in c.upper() for exc in excluded)]

X = summary_by_relabeled_200[features].values
y = summary_by_relabeled_200['relabeled'].values

# 2. Escalado (PCA es sensible a la escala)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 3. PCA
n_components = min(len(features), 5)  # por ejemplo, primeros 10 PCs
pca = PCA(n_components=n_components, random_state=42)
X_pca = pca.fit_transform(X_scaled)

# 4. Carga de cada feature en cada componente
loadings = pd.DataFrame(
    pca.components_.T,
    index=features,
    columns=[f'PC{i+1}' for i in range(n_components)]
)

# Mostrar los features más influyentes en PC1 y PC2
print("Top 5 features por carga absoluta en PC1:")
display(loadings['PC1'].abs().sort_values(ascending=False).head(5))

print("Top 5 features por carga absoluta en PC2:")
display(loadings['PC2'].abs().sort_values(ascending=False).head(5))

# 5. Visualizar los dos primeros PCs coloreados por relabeled
pca_df = pd.DataFrame(X_pca[:, :2], columns=['PC1','PC2'])
pca_df['relabeled'] = y
pca_df['subject']   = summary_by_relabeled_200['subject'].values

fig = px.scatter(
    pca_df, x='PC1', y='PC2',
    color='relabeled', symbol='subject',
    title='Proyección PCA (PC1 vs PC2)',
    hover_data=['subject','relabeled']
)
fig.show()

# 6. Entrenar un clasificador sencillo en el espacio PCA
clf = LogisticRegression(max_iter=1000, random_state=42)
scores = cross_val_score(clf, X_pca, y, cv=5, scoring='accuracy')
print(f"\nAccuracy 5-fold CV en espacio PCA ({n_components} componentes): {scores.mean():.3f} ± {scores.std():.3f}")


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_scores = cross_val_score(rf_model, X_pca, y, cv=5)

print(f"🎯 Accuracy 5-fold CV con Random Forest: {rf_scores.mean():.3f} ± {rf_scores.std():.3f}")


In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

# Crear el modelo KNN
knn_model = KNeighborsClassifier(n_neighbors=5)

# Evaluación utilizando validación cruzada
knn_scores = cross_val_score(knn_model, X_pca, y, cv=5)

# Mostrar resultados
print(f"🎯 Accuracy 5-fold CV con KNN: {knn_scores.mean():.3f} ± {knn_scores.std():.3f}")


In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_scores = cross_val_score(svm_model, X_pca, y, cv=5)

print(f"🎯 Accuracy 5-fold CV con SVM (RBF kernel): {svm_scores.mean():.3f} ± {svm_scores.std():.3f}")



In [None]:
import matplotlib.pyplot as plt

# Guardar resultados de validación cruzada
models = {
    'SVM (RBF kernel)': svm_scores,
    'Random Forest': rf_scores,
    'KNN': knn_scores
}

# Crear gráfico de las puntuaciones
plt.figure(figsize=(10,6))
for model_name, scores in models.items():
    plt.plot(range(1, 6), scores, label=f'{model_name}')

plt.title('Puntuaciones de validación cruzada')
plt.xlabel('Fold')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import xgboost

In [None]:
!pip install scikit-learn plotly pycaret xgboost
from pycaret.classification import *

# Crear el entorno de pycaret
data = pd.DataFrame(X_pca, columns=[f'PC{i+1}' for i in range(X_pca.shape[1])])
data['relabeled'] = y

# Iniciar la configuración de PyCaret
clf = setup(data=data, target='relabeled', session_id=42, fold=5, 
            normalize=True, feature_selection=True, pca=True)

# Comparar modelos, incluyendo KNN
best_model = compare_models()

# Entrenar el mejor modelo
final_model = finalize_model(best_model)

# Evaluar el modelo final
evaluate_model(final_model)


In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

# Crear modelo KNN
knn_model = KNeighborsClassifier(n_neighbors=5)

# Evaluación utilizando validación cruzada
knn_scores = cross_val_score(knn_model, X_pca, y, cv=5)

# Mostrar resultados
print(f"🎯 Accuracy 5-fold CV con KNN: {knn_scores.mean():.3f} ± {knn_scores.std():.3f}")


## Without Outliers

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Combinar todos los dataframes en uno solo para facilitar la visualización
all_data = []
relabeled_dfs = {}

# Buscar todas las variables df_relabeled_X_200 en el espacio global
for var_name in list(globals().keys()):
    if var_name.startswith('df_relabeled_') and var_name.endswith('_200'):
        relabeled_value = var_name.split('_')[2]
        relabeled_dfs[relabeled_value] = globals()[var_name]
        
        # Añadir los datos al conjunto combinado
        df_copy = globals()[var_name].copy()
        all_data.append(df_copy)

# Combinar todos los dataframes
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    
    # Identificar columnas de métricas excluyendo ZC, ZC_STD, KURT, KURT_STD
    metric_columns = [
        col for col in combined_df.columns 
        if col not in ['subject', 'relabeled', 'stimulus'] 
        and not any(exclude in col.upper() for exclude in ['ZC', 'Kurt'])
    ]
    
    # Eliminar outliers usando el rango intercuartílico (IQR)
    for col in metric_columns:
        Q1 = combined_df[col].quantile(0.25)
        Q3 = combined_df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        combined_df = combined_df[(combined_df[col] >= lower_bound) & (combined_df[col] <= upper_bound)]
    
    # Aplicar MinMaxScaler a las columnas de métricas
    scaler = MinMaxScaler()
    combined_df[metric_columns] = scaler.fit_transform(combined_df[metric_columns])
    
    # Crear una figura con subplots para cada métrica
    n_metrics = len(metric_columns)
    fig, axes = plt.subplots(nrows=(n_metrics+1)//2, ncols=2, figsize=(14, 3*((n_metrics+1)//2)), 
                             constrained_layout=True)
    axes = axes.flatten()
    
    # Crear boxplots para cada métrica
    for i, metric in enumerate(metric_columns):
        if i < len(axes):
            sns.boxplot(x='relabeled', y=metric, data=combined_df, ax=axes[i], palette='viridis')
            axes[i].set_title(f'Comparación de {metric} por grasp', fontsize=14)
            axes[i].set_xlabel('Categoría')
            axes[i].set_ylabel(metric)
            if len(combined_df['relabeled'].unique()) > 5:
                axes[i].tick_params(axis='x', rotation=45)

    # Eliminar subplots vacíos
    for i in range(n_metrics, len(axes)):
        fig.delaxes(axes[i])

    plt.suptitle('Comparación de métricas EMG entre diferentes categorías', fontsize=16, y=1.02)
    plt.tight_layout()
    plt.show()
    
    # ANOVA y Fisher Score
    anova_results = {}
    fisher_scores = {}

    categories = combined_df['relabeled'].unique()

    for metric in metric_columns:
        groups = [combined_df[combined_df['relabeled'] == cat][metric].dropna().values for cat in categories]
        try:
            f_stat, p_val = f_oneway(*groups)
        except:
            f_stat, p_val = np.nan, np.nan

        anova_results[metric] = {'F-statistic': f_stat, 'p-value': p_val}

        overall_mean = combined_df[metric].mean()
        num, den = 0, 0
        for cat in categories:
            class_data = combined_df[combined_df['relabeled'] == cat][metric]
            ni = len(class_data)
            class_mean = class_data.mean()
            class_var = class_data.var()
            num += ni * (class_mean - overall_mean) ** 2
            den += ni * class_var
        fisher = num / den if den != 0 else 0
        fisher_scores[metric] = fisher

    results_df = pd.DataFrame.from_dict(anova_results, orient='index')
    results_df['Fisher Score'] = pd.Series(fisher_scores)
    results_df_sorted = results_df.sort_values(by='Fisher Score', ascending=False)

    print("\n📊 Resultados ANOVA y Fisher Score ordenados:\n")
    display(results_df_sorted)

else:
    print("No se encontraron variables df_relabeled_X_200 en el espacio global.")

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import f_oneway
from sklearn.preprocessing import MinMaxScaler
import plotly.express as px

# Combinar todos los dataframes en uno solo para facilitar la visualización
all_data = []
relabeled_dfs = {}

# Buscar todas las variables df_relabeled_X_200 en el espacio global
for var_name in list(globals().keys()):
    if var_name.startswith('df_relabeled_') and var_name.endswith('_200'):
        relabeled_value = var_name.split('_')[2]
        relabeled_dfs[relabeled_value] = globals()[var_name]

        # Añadir los datos al conjunto combinado
        df_copy = globals()[var_name].copy()
        all_data.append(df_copy)

# Combinar todos los dataframes
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)

    # Identificar columnas de métricas excluyendo ZC, ZC_STD, KURT, KURT_STD
    metric_columns = [
        col for col in combined_df.columns 
        if col not in ['subject', 'relabeled', 'stimulus'] 
        and not any(exclude in col.upper() for exclude in ['ZC', 'KURT'])
    ]

    # Guardar copia original antes de eliminar outliers
    original_df = combined_df.copy()

    # Eliminar outliers usando el rango intercuartílico (IQR)
    for col in metric_columns:
        Q1 = combined_df[col].quantile(0.25)
        Q3 = combined_df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        combined_df = combined_df[(combined_df[col] >= lower_bound) & (combined_df[col] <= upper_bound)]

    # Obtener outliers eliminados
    outliers_removed = pd.concat([original_df, combined_df]).drop_duplicates(keep=False)

    # Normalizar datos
    scaler = MinMaxScaler()
    combined_df[metric_columns] = scaler.fit_transform(combined_df[metric_columns])

    # Mostrar algunos outliers eliminados con subject
    print(f"\n📌 Total de datos atípicos eliminados: {len(outliers_removed)}")
    display(outliers_removed[['subject', 'relabeled'] + metric_columns].head())

    # Crear boxplots interactivos con Plotly
    for metric in metric_columns:
        fig = px.box(
            combined_df,
            x="relabeled",
            y=metric,
            points="all",  # Mostrar todos los puntos
            title=f"Boxplot Interactivo de {metric} por Categoría",
            labels={"relabeled": "Categoría", metric: metric},
            color="relabeled",
            hover_data=['subject']  # Mostrar subject en tooltip
        )
        fig.update_layout(showlegend=False)
        fig.show()

    # ANOVA y Fisher Score
    anova_results = {}
    fisher_scores = {}
    categories = combined_df['relabeled'].unique()

    for metric in metric_columns:
        groups = [combined_df[combined_df['relabeled'] == cat][metric].dropna().values for cat in categories]
        try:
            f_stat, p_val = f_oneway(*groups)
        except:
            f_stat, p_val = np.nan, np.nan

        anova_results[metric] = {'F-statistic': f_stat, 'p-value': p_val}

        overall_mean = combined_df[metric].mean()
        num, den = 0, 0
        for cat in categories:
            class_data = combined_df[combined_df['relabeled'] == cat][metric]
            ni = len(class_data)
            class_mean = class_data.mean()
            class_var = class_data.var()
            num += ni * (class_mean - overall_mean) ** 2
            den += ni * class_var
        fisher = num / den if den != 0 else 0
        fisher_scores[metric] = fisher

    results_df = pd.DataFrame.from_dict(anova_results, orient='index')
    results_df['Fisher Score'] = pd.Series(fisher_scores)
    results_df_sorted = results_df.sort_values(by='Fisher Score', ascending=False)

    print("\n📊 Resultados ANOVA y Fisher Score ordenados:\n")
    display(results_df_sorted)

else:
    print("No se encontraron variables df_relabeled_X_200 en el espacio global.")


In [None]:
from sklearn.preprocessing import MinMaxScaler

# Combinar todos los dataframes
all_data = []
relabeled_dfs = {}

for var_name in list(globals().keys()):
    if var_name.startswith('df_relabeled_') and var_name.endswith('_200'):
        relabeled_value = var_name.split('_')[2]
        relabeled_dfs[relabeled_value] = globals()[var_name]
        df_copy = globals()[var_name].copy()
        all_data.append(df_copy)

if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)

    # Eliminar columnas específicas si existen
    drop_cols = ['ZC', 'ZC_STD', 'Kurt', 'Kurt_STD']
    combined_df.drop(columns=[col for col in drop_cols if col in combined_df.columns], inplace=True)

    metric_columns = [col for col in combined_df.columns if col not in ['subject', 'relabeled', 'stimulus']]

    # Eliminar outliers por IQR
    for col in metric_columns:
        Q1 = combined_df[col].quantile(0.25)
        Q3 = combined_df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        combined_df = combined_df[(combined_df[col] >= lower_bound) & (combined_df[col] <= upper_bound)]

    # Escalamiento
    scaler = MinMaxScaler()
    combined_df[metric_columns] = scaler.fit_transform(combined_df[metric_columns])

    # Gráficas
    n_metrics = len(metric_columns)
    fig, axes = plt.subplots(nrows=(n_metrics+1)//2, ncols=2, figsize=(14, 3*((n_metrics+1)//2)), constrained_layout=True)
    axes = axes.flatten()

    for i, metric in enumerate(metric_columns):
        if i < len(axes):
            sns.violinplot(x='relabeled', y=metric, data=combined_df, ax=axes[i], palette='viridis', inner='box')
            axes[i].set_title(f'Comparación de {metric} por grasp', fontsize=14)
            axes[i].set_xlabel('Categoría')
            axes[i].set_ylabel(metric)
            if len(combined_df['relabeled'].unique()) > 5:
                axes[i].tick_params(axis='x', rotation=45)

    for i in range(n_metrics, len(axes)):
        fig.delaxes(axes[i])

    plt.suptitle('Comparación de métricas EMG entre diferentes categorías', fontsize=16, y=1.02)
    plt.tight_layout()
    plt.show()

    # Estadísticas por grasp
    print("Stadistic for grasp:")
    for metric in metric_columns:
        print(f"\nMétrica: {metric}")
        display(combined_df.groupby('relabeled')[metric].describe())
else:
    print("No se encontraron variables df_relabeled_X_200 en el espacio global.")


In [None]:
inter_subject_stability = {}

for metric in metric_columns:
    stability_per_class = {}
    for label in combined_df['relabeled'].unique():
        subset = combined_df[combined_df['relabeled'] == label]
        # Media por sujeto
        subject_means = subset.groupby('subject')[metric].mean()
        # STD entre sujetos
        std_across_subjects = subject_means.std()
        stability_per_class[label] = std_across_subjects
    inter_subject_stability[metric] = stability_per_class

# Convertir a DataFrame para visualizar
stability_df = pd.DataFrame(inter_subject_stability).T
display(stability_df)


In [None]:
import pandas as pd
import numpy as np
from scipy.stats import f_oneway
import matplotlib.pyplot as plt
import seaborn as sns

# Paso 1: Preparar los datos (igual que antes)
all_data = []
relabeled_dfs = {}

for var_name in list(globals().keys()):
    if var_name.startswith('df_relabeled_') and var_name.endswith('_200'):
        relabeled_value = var_name.split('_')[2]
        relabeled_dfs[relabeled_value] = globals()[var_name]
        df_copy = globals()[var_name].copy()
        all_data.append(df_copy)

if not all_data:
    print("No se encontraron dataframes.")
else:
    combined_df = pd.concat(all_data, ignore_index=True)
    metric_columns = [col for col in combined_df.columns if col not in ['subject', 'relabeled', 'stimulus']]

# Aplicar MinMaxScaler a las columnas de métricas
    scaler = MinMaxScaler()
    combined_df[metric_columns] = scaler.fit_transform(combined_df[metric_columns])

    anova_results = {}
    fisher_scores = {}
    inter_subject_stability = {}

    categories = combined_df['relabeled'].unique()

    for metric in metric_columns:
        # --- ANOVA ---
        groups = [combined_df[combined_df['relabeled'] == cat][metric].dropna().values for cat in categories]
        try:
            f_stat, p_val = f_oneway(*groups)
        except:
            f_stat, p_val = np.nan, np.nan
        anova_results[metric] = {'F-statistic': f_stat, 'p-value': p_val}

        # --- Fisher Score ---
        overall_mean = combined_df[metric].mean()
        num = 0
        den = 0
        for cat in categories:
            class_data = combined_df[combined_df['relabeled'] == cat][metric]
            ni = len(class_data)
            class_mean = class_data.mean()
            class_var = class_data.var()
            num += ni * (class_mean - overall_mean) ** 2
            den += ni * class_var
        fisher_scores[metric] = num / den if den != 0 else 0

        # --- Estabilidad Inter-Sujeto ---
        stability_per_class = {}
        for label in categories:
            subset = combined_df[combined_df['relabeled'] == label]
            subject_means = subset.groupby('subject')[metric].mean()
            std_across_subjects = subject_means.std()
            stability_per_class[label] = std_across_subjects
        inter_subject_stability[metric] = stability_per_class

    # Paso 2: Armar el DataFrame resumen
    results_df = pd.DataFrame.from_dict(anova_results, orient='index')
    results_df['Fisher Score'] = pd.Series(fisher_scores)
    stability_df = pd.DataFrame(inter_subject_stability).T
    results_df['Inter-Subject STD (mean)'] = stability_df.mean(axis=1)

    # Paso 3: Ordenar y mostrar
    results_df_sorted = results_df.sort_values(by='Fisher Score', ascending=False)

    # Paso 4: Interpretación adicional (opcional)
    def interpretar_fila(row):
        if row['p-value'] < 0.05 and row['Fisher Score'] > 1.5 and row['Inter-Subject STD (mean)'] < 0.5:
            return 'Excelente'
        elif row['p-value'] < 0.05 and row['Fisher Score'] > 1.0:
            return 'Buena'
        elif row['Fisher Score'] < 0.5 or row['p-value'] > 0.1:
            return 'Pobre'
        else:
            return 'Moderada'

    results_df_sorted['Interpretación'] = results_df_sorted.apply(interpretar_fila, axis=1)

    # Mostrar resultados
    print("\nTabla resumen con Fisher, ANOVA y Estabilidad inter-sujeto:\n")
    display(results_df_sorted)


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Excluir métricas específicas
excluded_features = ['ZC', 'ZC_STD', 'Kurt', 'Kurt_STD']
filtered_metric_columns = [col for col in metric_columns if col not in excluded_features]

# Filtrar columnas en el DataFrame
filtered_df = combined_df[['relabeled'] + filtered_metric_columns].copy()

# Eliminar atípicos usando el método del IQR para cada métrica
for col in filtered_metric_columns:
    Q1 = filtered_df[col].quantile(0.25)
    Q3 = filtered_df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    filtered_df = filtered_df[(filtered_df[col] >= lower_bound) & (filtered_df[col] <= upper_bound)]

# 1. Calcular la mediana por grupo (relabeled) para cada métrica
median_df = filtered_df.groupby('relabeled')[filtered_metric_columns].median()

# 2. Calcular la varianza entre las medianas para cada métrica
median_variance = median_df.var()

# 3. Convertir a DataFrame para visualización
median_variance_df = median_variance.reset_index()
median_variance_df.columns = ['métrica', 'varianza_entre_medianas']

# 4. Normalizar las varianzas (Min-Max)
scaler = MinMaxScaler()
median_variance_df['varianza_normalizada'] = scaler.fit_transform(
    median_variance_df[['varianza_entre_medianas']]
)

# 5. Mostrar la tabla ordenada (opcional)
print("Varianza entre medianas y su normalización:")
display(median_variance_df.sort_values(by='varianza_normalizada', ascending=False))

# 6. Visualización con barplot
plt.figure(figsize=(10, 6))
sns.barplot(
    x='varianza_normalizada',
    y='métrica',
    data=median_variance_df.sort_values(by='varianza_normalizada', ascending=True),
    palette='viridis'
)
plt.title('Varianza normalizada entre medianas por métrica (sin atípicos)', fontsize=14)
plt.xlabel('Varianza normalizada')
plt.ylabel('Métrica')
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()


In [None]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# ---------- 1. Eliminar métricas específicas ----------
excluded_metrics = ['ZC', 'ZC_STD', 'Kurt', 'Kurt_STD']
filtered_metrics = [col for col in metric_columns if col not in excluded_metrics]

# ---------- 2. Filtrar outliers usando el método IQR ----------
def remove_outliers_iqr(df, columns):
    df_clean = df.copy()
    for col in columns:
        Q1 = df_clean[col].quantile(0.25)
        Q3 = df_clean[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df_clean = df_clean[(df_clean[col] >= lower_bound) & (df_clean[col] <= upper_bound)]
    return df_clean

# Aplicar filtro de outliers sobre las columnas métricas filtradas
clean_df = remove_outliers_iqr(combined_df, filtered_metrics)

# ---------- 3. Entrenamiento del modelo con datos limpios ----------
X = clean_df[filtered_metrics]
y = clean_df['relabeled']

# Crear y entrenar el modelo Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# Obtener importancias de características
importances = rf_model.feature_importances_

# ---------- 4. Visualización ----------
plt.figure(figsize=(11, 6))
sns.barplot(x=filtered_metrics, y=importances, palette='viridis')
plt.title('Importancia de Características para la Clasificación de Agarre (sin outliers y sin ZC/Kurtosis)')
plt.xlabel('Métricas')
plt.ylabel('Importancia')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
import os
import re
import numpy as np
import pandas as pd

# ---------- Función para calcular CV ----------
def calculate_cv(metrics_df):
    cv_values = {}
    for metric in metrics_df.columns:
        mean_value = metrics_df[metric].mean()
        std_value = metrics_df[metric].std()
        if mean_value != 0:
            cv_values[metric] = (std_value / mean_value) * 100
        else:
            cv_values[metric] = np.nan
    return cv_values

# ---------- Función para eliminar outliers usando IQR ----------
def remove_outliers_iqr(df, columns):
    df_clean = df.copy()
    for col in columns:
        Q1 = df_clean[col].quantile(0.25)
        Q3 = df_clean[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df_clean = df_clean[(df_clean[col] >= lower_bound) & (df_clean[col] <= upper_bound)]
    return df_clean

# ---------- Parámetros ----------
fm = 2000
window_length = 400
overlap = 0
target_channel = "Channel 8"

all_metrics = []

# ---------- Iterar sobre los archivos ----------
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)

        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)

                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue

                test_df, grasps = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                test_df = test_df[test_df['relabeled'].isin(filtered_labels)]

                if target_channel not in test_df.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue

                emg_columns = [target_channel]
                envelope_df = src.get_envelope_lowpass(
                    test_df[emg_columns], fm=2000, cutoff_freq=0.6, envelope_type=1
                )

                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)

                window_count = 0

                for grasp in grasps:
                    try:
                        print(f"\nProcessing Grasp {grasp} in file {file_name}:")
                        grasp_df = result_df[result_df['stimulus'] == grasp]

                        if grasp_df.empty:
                            print(f"No hay datos para el grasp {grasp} en {file_name}.")
                            continue

                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)

                        for i, ventana in enumerate(ventanas):
                            if len(ventana) == window_length:
                                signal = ventana[target_channel].values
                                metrics = calculate_emg_metrics_means(signal)

                                metrics_with_meta = {
                                    "subject": folder,
                                    "relabeled": grasp_df['relabeled'].iloc[0],
                                    "stimulus": grasp,
                                    "channel": target_channel,
                                    "window_id": f"{file_name}_{grasp}_{i}",
                                    "file_name": file_name,
                                    "window_number": window_count,
                                    **metrics
                                }

                                all_metrics.append(metrics_with_meta)
                                window_count += 1
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue

                print(f"Procesadas {window_count} ventanas para el archivo {file_name}")

# ---------- Crear DataFrame general ----------
metrics_df_200 = pd.DataFrame(all_metrics)

# ---------- Reordenar columnas ----------
meta_cols = ["subject", "relabeled", "stimulus", "channel", "window_id", "file_name", "window_number"]
metric_cols = [col for col in metrics_df_200.columns if col not in meta_cols]
column_order = meta_cols + sorted(metric_cols)
metrics_df_200 = metrics_df_200[column_order]

# ---------- Filtrar métricas no deseadas ----------
excluded_metrics = ['ZC', 'ZC_STD', 'Kurt', 'Kurt_STD']
filtered_metrics = [col for col in metric_cols if col not in excluded_metrics]

# ---------- Eliminar outliers ----------
clean_df_200 = remove_outliers_iqr(metrics_df_200, filtered_metrics)

# ---------- Calcular CV sobre métricas limpias ----------
cv_values = calculate_cv(clean_df_200[filtered_metrics])
cv_df = pd.DataFrame.from_dict(cv_values, orient='index', columns=['Coeficiente de Variación'])
cv_df = cv_df.sort_values(by='Coeficiente de Variación', ascending=False)

# ---------- Mostrar resultados ----------
print("\n📊 Coeficiente de variación de las métricas (sin outliers, sin ZC/Kurtosis):")
display(cv_df)

print("\n📈 Resumen de métricas por tipo de movimiento (limpio):")
grouped_df = clean_df_200.drop(columns=['channel'], errors='ignore')
summary_by_subject_movement_200 = grouped_df.select_dtypes(include=['number']).groupby(['relabeled']).mean()
display(summary_by_subject_movement_200)

print(f"\n✅ Total de ventanas limpias procesadas: {len(clean_df_200)}")
print(f"📌 Distribución por sujeto:\n{clean_df_200['subject'].value_counts()}")
print(f"📌 Distribución por movimiento:\n{clean_df_200['relabeled'].value_counts()}")