## Libraries

In [None]:
import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
from matplotlib.patches import Ellipse

import plotly.express as px
import plotly.graph_objects as go

from itertools import combinations
from scipy import stats
from scipy.io import loadmat, whosmat
from scipy.spatial.distance import pdist, squareform, cdist
from scipy.cluster.hierarchy import dendrogram, linkage
import scipy.cluster.hierarchy as sch
from scipy.linalg import inv

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans

from statsmodels.multivariate.manova import MANOVA

import pywt

import src
from src import config, loadmatNina
from src.preprocessing_utils import get_envelope
import pycaret.classification as pyc
import xgboost

In [None]:
# Choose the database to analyze
database = 'DB4'

data_path = f'data/{database}'

# Find the folder named with the convention s + "number"
folder = None
for item in os.listdir(data_path):
    if re.match(r'[sS]\d+', item) or re.match(r'Subject\d+', item):
        folder = item
        break

if folder:
    folder_path = os.path.join(data_path, folder)
    results = []

    # Iterate over all .mat files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.mat'):
            file_path = os.path.join(folder_path, file_name)
            info = whosmat(file_path)
            results.append((file_name, info))

    # Create a DataFrame to store the results
    data = {}
    for file_name, info in results:
        for item in info:
            if item[0] not in data:
                data[item[0]] = {}
            data[item[0]][file_name] = item[1:]

    df = pd.DataFrame(data)
    df = df.transpose()
    df.columns.name = 'File Name'

    print(df)
else:
    print("No folder found with the convention s + 'number'")

In [None]:
def calculate_emg_metrics(signal, fs=2000):
    """
    Calculates various metrics for an EMG signal.

    Parameters:
    - signal: NumPy array containing the EMG signal.
    - fs: Sampling frequency in Hz (default: 1000 Hz).

    Returns:
    - A dictionary with the computed metrics.
    """
    # Mean Absolute Value (MAV)
    mav = np.mean(np.abs(signal))
    
    # Integrated Absolute Value (IAV)
    iav = np.sum(np.abs(signal))
    
    # Root Mean Square (RMS)
    rms = np.sqrt(np.mean(signal**2))
    
    # Waveform Length (WL)
    wl = np.sum(np.abs(np.diff(signal)))
    
    # Zero Crossings (ZC)
    zc = np.sum(np.diff(np.sign(signal)) != 0)
    
    # Slope Sign Changes (SSC)
    diff_signal = np.diff(signal)
    ssc = np.sum((diff_signal[1:] * diff_signal[:-1]) < 0)
    
    # Variance (VAR)
    var = np.var(signal)
    
    # Coefficient of Variation (CoV)
    mean_signal = np.mean(signal)
    cov = (np.std(signal) / mean_signal) if mean_signal != 0 else 0
    
    # Mean Frequency (MNF)
    freqs = np.fft.rfftfreq(len(signal), d=1/fs)
    fft_magnitude = np.abs(np.fft.rfft(signal))
    mnf = np.sum(freqs * fft_magnitude) / np.sum(fft_magnitude)
    
    # Marginal Discrete Wavelet Transform (mDWT)
    coeffs = pywt.wavedec(signal, 'db4', level=4)
    mdwt = np.sum([np.sum(np.abs(c)) for c in coeffs])
    
    # Temporal Difference (TD)
    td = np.sum(np.abs(np.diff(signal)))
    
    # Mean Absolute Value Slope (MAVS)
    mavs = np.mean(np.abs(np.diff(signal)))
    
    # Return the metrics as a dictionary
    metrics = {
        "MAV": mav,
        "IAV": iav,
        "RMS": rms,
        "WL": wl,
        "ZC": zc,
        "SSC": ssc,
        "VAR": var,
        "CoV": cov,
        "MNF": mnf,
        "mDWT": mdwt,
        "TD": td,
        "MAVS": mavs
    }
    
    return metrics

def calculate_emg_metrics_std(signal, fs=2000):
    """
    Calculates various metrics for an EMG signal, including mean and standard deviation.

    Parameters:
    - signal: NumPy array containing the EMG signal.
    - fs: Sampling frequency in Hz (default: 1000 Hz).

    Returns:
    - A dictionary with the computed metrics.
    """
    if signal.ndim == 2:
        metrics_per_channel = [calculate_emg_metrics(signal[:, ch], fs) for ch in range(signal.shape[1])]
        averaged_metrics = {key: np.mean([m[key] for m in metrics_per_channel]) for key in metrics_per_channel[0]}
        return averaged_metrics
    
    # Mean Absolute Value (MAV)
    mav = np.mean(np.abs(signal))
    mav_std = np.std(np.abs(signal))
    
    # Integrated Absolute Value (IAV)
    iav = np.sum(np.abs(signal))
    iav_std = np.std(np.abs(signal))
    
    # Root Mean Square (RMS)
    rms = np.sqrt(np.mean(signal**2))
    rms_std = np.std(signal)
    
    # Waveform Length (WL)
    wl = np.sum(np.abs(np.diff(signal)))
    wl_std = np.std(np.abs(np.diff(signal)))
    
    # Zero Crossings (ZC)
    zc = np.sum(np.diff(np.sign(signal)) != 0)
    zc_std = np.std(np.diff(np.sign(signal)) != 0)
    
    # Slope Sign Changes (SSC)
    diff_signal = np.diff(signal)
    ssc = np.sum((diff_signal[1:] * diff_signal[:-1]) < 0)
    ssc_std = np.std((diff_signal[1:] * diff_signal[:-1]) < 0)
    
    # Variance (VAR)
    var = np.var(signal)
    var_std = np.std(signal)
    
    # Coefficient of Variation (CoV)
    mean_signal = np.mean(signal)
    cov = (np.std(signal) / mean_signal) if mean_signal != 0 else 0
    cov_std = np.std(cov)
    
    # Mean Frequency (MNF)
    freqs = np.fft.rfftfreq(len(signal), d=1/fs)
    fft_magnitude = np.abs(np.fft.rfft(signal))
    mnf = np.sum(freqs * fft_magnitude) / np.sum(fft_magnitude)
    mnf_std = np.std(freqs * fft_magnitude) / np.sum(fft_magnitude)
    
    # Marginal Discrete Wavelet Transform (mDWT)
    coeffs = pywt.wavedec(signal, 'db4', level=4)
    mdwt = np.sum([np.sum(np.abs(c)) for c in coeffs])
    mdwt_std = np.std([np.sum(np.abs(c)) for c in coeffs])
    
    # Temporal Difference (TD)
    td = np.sum(np.abs(np.diff(signal)))
    td_std = np.std(np.abs(np.diff(signal)))
    
    # Mean Absolute Value Slope (MAVS)
    mavs = np.mean(np.abs(np.diff(signal)))
    mavs_std = np.std(np.abs(np.diff(signal)))
    
    # Return the metrics as a dictionary
    metrics = {
        "MAV": mav, "MAV_STD": mav_std,
        "IAV": iav, "IAV_STD": iav_std,
        "RMS": rms, "RMS_STD": rms_std,
        "WL": wl, "WL_STD": wl_std,
        "ZC": zc, "ZC_STD": zc_std,
        "SSC": ssc, "SSC_STD": ssc_std,
        "VAR": var, "VAR_STD": var_std,
        "CoV": cov, "CoV_STD": cov_std,
        "MNF": mnf, "MNF_STD": mnf_std,
        "mDWT": mdwt, "mDWT_STD": mdwt_std,
        "TD": td, "TD_STD": td_std,
        "MAVS": mavs, "MAVS_STD": mavs_std
    }
    
    return metrics

def calculate_emg_metrics_cv(signal, fs=2000):
    """
    Calculates various metrics for an EMG signal, including their coefficient of variation (CV).

    Parameters:
    - signal: NumPy array containing the EMG signal.
    - fs: Sampling frequency in Hz (default: 2000 Hz).

    Returns:
    - A dictionary with the computed metrics and their CVs.
    """
    try:
        if signal.ndim == 2:
            metrics_per_channel = [calculate_emg_metrics(signal[:, ch], fs) for ch in range(signal.shape[1])]
            averaged_metrics = {key: np.mean([m[key] for m in metrics_per_channel]) for key in metrics_per_channel[0]}
            cv_metrics = {}
            for key in averaged_metrics:
                values = np.array([m[key] for m in metrics_per_channel])
                mean_val = np.mean(values)
                std_val = np.std(values)
                cv = (std_val / mean_val) * 100 if mean_val != 0 else np.nan
                cv_metrics[key + "_CV"] = cv
            return {**averaged_metrics, **cv_metrics}

        abs_signal = np.abs(signal)
        diff_signal = np.diff(signal)
        diff_abs_signal = np.abs(diff_signal)

        def compute_cv(values):
            mean_val = np.mean(values)
            std_val = np.std(values)
            return (std_val / mean_val) * 100 if mean_val != 0 else np.nan

        metrics = {
            "MAV": np.mean(abs_signal),
            "IAV": np.sum(abs_signal),
            "RMS": np.sqrt(np.mean(signal**2)),
            "WL": np.sum(diff_abs_signal),
            "ZC": np.sum(np.diff(np.sign(signal)) != 0),
            "SSC": np.sum((diff_signal[1:] * diff_signal[:-1]) < 0),
            "VAR": np.var(signal),
            "CoV": (np.std(signal) / np.mean(signal)) if np.mean(signal) != 0 else 0,
            "TD": np.sum(diff_abs_signal),
            "MAVS": np.mean(diff_abs_signal),
            "MNP": np.mean(signal**2),
        }

        # Espectro
        freqs = np.fft.rfftfreq(len(signal), d=1/fs)
        fft_magnitude = np.abs(np.fft.rfft(signal))
        metrics["MNF"] = np.sum(freqs * fft_magnitude) / np.sum(fft_magnitude) if np.sum(fft_magnitude) != 0 else 0

        # Wavelet
        coeffs = pywt.wavedec(signal, 'db4', level=4)
        mdwt_values = np.array([np.sum(np.abs(c)) for c in coeffs])
        metrics["mDWT"] = np.sum(mdwt_values)

        # Kurtosis
        std_signal = np.std(signal)
        metrics["Kurt"] = np.mean((signal - np.mean(signal)) ** 4) / (std_signal ** 4) if std_signal != 0 else 0

        # Calcular CVs
        cv_metrics = {
            "MAV_CV": compute_cv(abs_signal),
            "IAV_CV": compute_cv(abs_signal),
            "RMS_CV": compute_cv(signal),
            "WL_CV": compute_cv(diff_abs_signal),
            "ZC_CV": compute_cv(np.diff(np.sign(signal)) != 0),
            "SSC_CV": compute_cv((diff_signal[1:] * diff_signal[:-1]) < 0),
            "VAR_CV": compute_cv(signal),
            "TD_CV": compute_cv(diff_abs_signal),
            "MAVS_CV": compute_cv(diff_abs_signal),
            "MNP_CV": compute_cv(signal**2),
            "MNF_CV": compute_cv(freqs * fft_magnitude) if np.sum(fft_magnitude) != 0 else np.nan,
            "mDWT_CV": compute_cv(mdwt_values),
            "Kurt_CV": 0  # No se calcula CV de kurtosis porque es escalar
        }

        return {**metrics, **cv_metrics}

    except Exception as e:
        print(f"Error in calculate_emg_metrics_cv: {e}")
        return {}

import numpy as np
import pywt
from scipy.stats import kurtosis

import numpy as np
import pywt
from scipy.stats import kurtosis

def calculate_emg_metrics_means(signal, fs=2000):
    """
    Calculates averaged EMG metrics for a single or multi-channel signal.
    Computes the Coefficient of Variation (CoV) for each metric.

    Parameters:
    - signal: np.ndarray. EMG signal, either 1D (samples) or 2D (samples x channels).
    - fs: int. Sampling frequency in Hz (default: 2000 Hz).

    Returns:
    - dict: Dictionary with EMG metrics and their CoV.
    """
    try:
        if signal.ndim == 2:
            all_metrics = [calculate_emg_metrics_means(signal[:, ch], fs) for ch in range(signal.shape[1])]
            averaged = {k: np.mean([m[k] for m in all_metrics]) for k in all_metrics[0]}
            return averaged

        # Single-channel processing
        abs_signal = np.abs(signal)
        diff_signal = np.diff(signal)
        diff_abs_signal = np.abs(diff_signal)
        mean_signal = np.mean(signal)
        std_signal = np.std(signal)

        freqs = np.fft.rfftfreq(len(signal), d=1/fs)
        fft_magnitude = np.abs(np.fft.rfft(signal))

        coeffs = pywt.wavedec(signal, 'db4', level=4)
        mdwt_values = np.array([np.sum(np.abs(c)) for c in coeffs])

        def cov(x):
            mu = np.mean(x)
            sigma = np.std(x)
            return sigma / mu if mu != 0 else 0

        metrics = {
            "MAV": np.mean(abs_signal),
            # "MAV_CoV": cov(abs_signal),

            "IAV": np.sum(abs_signal),
            # "IAV_CoV": cov(abs_signal),

            "RMS": np.sqrt(np.mean(signal**2)),
            # "RMS_CoV": cov(signal),

            "WL": np.sum(diff_abs_signal),
            # "WL_CoV": cov(diff_abs_signal),

            "ZC": np.sum(np.diff(np.sign(signal)) != 0),
            # "ZC_CoV": cov(np.diff(np.sign(signal)) != 0),

            "SSC": np.sum((diff_signal[1:] * diff_signal[:-1]) < 0),
            # "SSC_CoV": cov((diff_signal[1:] * diff_signal[:-1]) < 0),

            "VAR": np.var(signal),
            # "VAR_CoV": cov(signal),

            # "CoV": (std_signal / mean_signal) if mean_signal != 0 else 0,

            "TD": np.sum(diff_abs_signal),
            # "TD_CoV": cov(diff_abs_signal),

            "MAVS": np.mean(diff_abs_signal),
            # "MAVS_CoV": cov(diff_abs_signal),

            "MNP": np.mean(signal**2),
            # "MNP_CoV": cov(signal**2),

            "MNF": np.sum(freqs * fft_magnitude) / np.sum(fft_magnitude) if np.sum(fft_magnitude) != 0 else 0,
            # "MNF_CoV": cov(freqs * fft_magnitude) if np.sum(fft_magnitude) != 0 else 0,

            "mDWT": np.sum(mdwt_values),
            # "mDWT_CoV": cov(mdwt_values),

            "Kurt": kurtosis(signal, fisher=False),
            # "Kurt_CoV": 0  # CoV no aplica al escalar de curtosis
        }

        return metrics

    except Exception as e:
        print(f"Error in calculate_emg_metrics_means: {e}")
        return {}


## Combined dataframe for all database

In [None]:
# Database name
database = 'DB4'

# Full path to the database folder
data_path = os.path.abspath(os.path.join('data', database))

# List of subjects, generating names from 's1' to 's10'
subjects = [f's{i}' for i in range(1, 11)]

In [None]:
# List to store all generated DataFrames
all_dataframes = []

# Look for folders matching the pattern "s + number" or "Subject + number"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterate over all .mat files in the folder
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Attempt to load the .mat file
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Attempt to process the file with src.build_dataframe
                try:
                    test_df, grasps = src.build_dataframe(
                        mat_file=mat_data,
                        database=database,
                        filename=file_name,
                        rectify=False,
                        normalize=True
                    )
                    
                    # Add a column with the subject name (folder) to the DataFrame
                    test_df['subject'] = folder  
                    
                    # Append the processed DataFrame to the list
                    all_dataframes.append(test_df)

                except Exception as e:
                    print(f"Error processing {file_name}: {str(e)}")
                    continue

# Concatenate all DataFrames into a single one if data is available
if all_dataframes:  
    combined_df = pd.concat(all_dataframes, ignore_index=True)
    
    # Display the combined DataFrame
    print("\n Combined DataFrame:")
    display(combined_df)  

else:
    print("Warning: No DataFrames were generated. Check the input data.")

## Metrics with std for every channel, for every grasp

In [None]:
metrics_data = []

# Iterate over each subject and each identified movement (relabeled or stimulus)
for (subject, relabeled,re_repetition), group in combined_df.groupby(['subject', 'relabeled','re_repetition']):  # Change 'relabeled' to 'stimulus' if needed
    # Iterate over each EMG channel
    for channel in group.columns:  # Loop through all DataFrame columns
        if channel.startswith('Channel'):  # Filter only EMG signal columns
            # Get the signal values for the current channel
            channel_signal = group[channel].values
            
            # Compute EMG signal metrics for the current channel
            metrics = calculate_emg_metrics_means(channel_signal)
            
            # Append metadata and computed metrics to the list
            metrics_data.append({
                "subject": subject,  # Subject identification
                "relabeled": relabeled,  # Movement identification (relabeled or stimulus)
                "channel": channel,  # EMG channel
                're_reptition': re_repetition,  # Repetition number
                **metrics  # Unpack all computed metrics
            })

# Create a DataFrame containing all the obtained metrics
metrics_df = pd.DataFrame(metrics_data)

# Reorder columns for better visualization (optional)
column_order = ["subject", "relabeled", "channel"] + list(metrics.keys())
metrics_df = metrics_df[column_order]

# Display the DataFrame with the computed metrics
print("\nMetrics DataFrame by Channel, Subject, and Relabeled:")
display(metrics_df)

In [None]:
# Remove the 'channel' column to group data by subject and movement type
grouped_df = metrics_df.drop(columns=['channel'])
display(grouped_df)

# Compute the mean value of each metric grouped by subject and movement
df_mean = grouped_df.groupby(['subject', 'relabeled']).mean()

# Compute the standard deviation of each metric grouped by subject and movement
df_std = grouped_df.groupby(['subject', 'relabeled']).std()

# Rename columns to indicate they contain mean values
df_mean.columns = [f"{col} mean" for col in df_mean.columns]

# Rename columns to indicate they contain standard deviation values
df_std.columns = [f"{col} std" for col in df_std.columns]

# Merge the mean and standard deviation DataFrames into a single DataFrame
df_result = df_mean.merge(df_std, on=['subject', 'relabeled']).reset_index()

# Display the final DataFrame with aggregated metrics
display(df_result)

### Relabeled Selection

In [None]:
filtered_labels = [55,2,14,28,32,0]
dataframe_windowing = grouped_df[grouped_df['relabeled'].isin(filtered_labels)]
dataframe_windowing

In [None]:
metrics_data = []

# Definir columnas EMG (que comienzan con 'Channel')
emg_columns = [col for col in combined_df.columns if col.startswith('Channel')]

# Iterar sobre cada sujeto y cada movimiento identificado
for (subject, relabeled), group in combined_df.groupby(['subject', 'relabeled']):
    for channel in emg_columns:
        channel_signal = group[channel].values
        metrics = calculate_emg_metrics_means(channel_signal)

        metrics_data.append({
            "subject": subject,
            "relabeled": relabeled,
            "channel": channel,
            **metrics
        })

# Crear DataFrame de métricas por canal
metrics_df = pd.DataFrame(metrics_data)

# Filtrar por clases si es necesario
metrics_df = metrics_df[metrics_df['relabeled'].isin(filtered_labels)]

# Calcular promedio de métricas por sujeto y clase (sin incluir columna 'channel')
avg_metrics_df = metrics_df.drop(columns=["channel"]).groupby(['subject', 'relabeled']).mean().reset_index()

# Mostrar DataFrame promediado
print("\nPromedio de métricas por Subject y Relabeled (promedio de canales):")
display(avg_metrics_df)

### Functions

In [None]:
def plots_Windowed(fm, window_length, overlap, target_channels, cutoff_freq, envelope_type, filtered_labels):
    """
    Función para graficar las ventanas de múltiples canales de EMG.

    Parámetros:
    - fm: Frecuencia de muestreo en Hz.
    - window_length: Longitud de la ventana en muestras.
    - overlap: Porcentaje de superposición entre ventanas.
    - target_channels: Lista de nombres de canales a graficar.
    """
    all_dataframes = []

    for folder in os.listdir(data_path):
        if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
            folder_path = os.path.join(data_path, folder)
            
            for file_name in os.listdir(folder_path):
                if file_name.endswith('.mat'):
                    file_path = os.path.join(folder_path, file_name)
                    
                    try:
                        mat_data = src.loadmatNina(database, file_name, subject=folder)
                    except Exception as e:
                        print(f"Error loading {file_name}: {str(e)}")
                        continue
                    
                    df_norm, grasps_etiquetados = src.build_dataframe(
                        mat_file=mat_data,
                        database=database,
                        filename=file_name,
                        rectify=False,
                        normalize=True
                    )
                    df_norm = df_norm[df_norm['relabeled'].isin(filtered_labels)]

                    # Verificar que todos los canales están disponibles
                    missing_channels = [ch for ch in target_channels if ch not in df_norm.columns]
                    if missing_channels:
                        print(f"Canales faltantes en {file_name}: {missing_channels}, omitiendo.")
                        continue
                    
                    # Aplicar envelope a todos los canales de interés
                    envelope_df = src.get_envelope_lowpass(df_norm[target_channels], fm=fm, cutoff_freq=cutoff_freq, envelope_type=envelope_type)
                    
                    meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                    result_df = pd.concat([envelope_df, df_norm[meta_columns]], axis=1)

                    for grasp in grasps_etiquetados:
                        try:
                            grasp_df = result_df[result_df['stimulus'] == grasp]
                            if grasp_df.empty:
                                continue
                            
                            ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                            ventanas_df = [pd.DataFrame(ventana, columns=target_channels) for ventana in ventanas if len(ventana) == window_length]

                            if not ventanas_df:
                                continue
                            
                            all_dataframes.extend(ventanas_df)

                            # Graficar la primera ventana para todos los canales juntos
                            plt.figure(figsize=(12, 6))
                            tiempo = np.linspace(0, window_length / fm, window_length)
                            for channel in target_channels:
                                plt.plot(tiempo, ventanas_df[0][channel], label=channel)
                            
                            plt.xlabel("Tiempo (s)")
                            plt.ylabel("Amplitud EMG (envelope)")
                            plt.title(f"{file_name} - Grasp {grasp} - Todos los canales")
                            plt.legend(loc='upper right')
                            plt.grid(True)
                            plt.tight_layout()
                            plt.show()
                        except Exception as e:
                            print(f"Error processing grasp {grasp}: {str(e)}")
                            continue


def envelope_raw(fm, window_length, overlap, target_channel, cutoff_freq,envelope_type, filtered_labels):
    """
    Función para graficar las ventanas de un canal específico de EMG.
    
    Parámetros:
    - fm: Frecuencia de muestreo en Hz.
    - window_length: Longitud de la ventana en muestras.
    - overlap: Porcentaje de superposición entre ventanas.
    - target_channel: Nombre del canal objetivo a graficar.
    """
# Lista para almacenar todos los DataFrames generados
    all_dataframes = []
    # Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
    for folder in os.listdir(data_path):
        if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
            folder_path = os.path.join(data_path, folder)
            
            # Iterar sobre todos los archivos .mat en la carpeta
            for file_name in os.listdir(folder_path):
                if file_name.endswith('.mat'):
                    file_path = os.path.join(folder_path, file_name)
                    
                    # Intentar cargar el archivo .mat
                    try:
                        mat_data = src.loadmatNina(database, file_name, subject=folder)
                    except Exception as e:
                        print(f"Error loading {file_name}: {str(e)}")
                        continue
                    
                    # Procesar el archivo con src.build_dataframe
                    df_norm, grasps_etiquetados = src.build_dataframe(
                        mat_file=mat_data,
                        database=database,
                        filename=file_name,
                        rectify=False,
                        normalize=True
                    )
                    df_norm = df_norm[df_norm['relabeled'].isin(filtered_labels)]
                    print(f"Columnas disponibles en {file_name}: {df_norm.columns.tolist()}")

                    # Verificar si el canal objetivo está presente en el DataFrame
                    if target_channel not in df_norm.columns:
                        print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                        continue
                    
                    # Guardar la señal cruda antes de aplicar el envelope
                    raw_signal = df_norm[[target_channel]].copy()
                    
                    # Aplicar extracción del envelope solo a Channel_10
                    envelope_df = src.get_envelope_lowpass(df_norm[[target_channel]], fm=fm, cutoff_freq=cutoff_freq, envelope_type=envelope_type)  
                    
                    # Conservar columnas meta
                    meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                    # Combinar señal envolvente con metadatos
                    result_df = pd.concat([envelope_df, df_norm[meta_columns]], axis=1)
                    
                    # Procesar cada grasp
                    for grasp in grasps_etiquetados:
                        try:
                            print(f"\nProcessing Grasp {grasp}:")
                            grasp_df = result_df[result_df['stimulus'] == grasp]
                            raw_grasp_df = raw_signal[df_norm['stimulus'] == grasp]  # Datos crudos correspondientes
                            
                            if grasp_df.empty:
                                print(f"No hay datos para el grasp {grasp} en {file_name}.")
                                continue
                            
                            # Crear ventanas con overlap a partir del DataFrame filtrado
                            ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                            ventanas_raw = src.create_windows_with_overlap(raw_grasp_df, window_length, overlap)
                            
                            # Guardar cada ventana como un DataFrame individual, solo si tiene el tamaño completo
                            ventanas_df = [pd.DataFrame(ventana, columns=[target_channel]) for ventana in ventanas if len(ventana) == window_length]
                            ventanas_raw_df = [pd.DataFrame(ventana, columns=[target_channel]) for ventana in ventanas_raw if len(ventana) == window_length]
                            
                            if not ventanas_df or not ventanas_raw_df:
                                print(f"No hay ventanas válidas para el grasp {grasp} en {file_name}.")
                                continue
                            
                            # Agregar a la lista general de DataFrames
                            all_dataframes.extend(ventanas_df)
                            
                            # Graficar la primera ventana de este grasp con ambas señales
                            plt.figure(figsize=(12, 5))
                            tiempo = np.linspace(0, window_length / fm, window_length)
                            
                            # Graficar señal cruda
                            plt.plot(tiempo, ventanas_raw_df[0][target_channel], color='c', alpha=0.7, 
                                    label=f"{target_channel} - Señal Cruda")
                            
                            # Graficar envolvente
                            plt.plot(tiempo, ventanas_df[0][target_channel], color='m', linewidth=2, 
                                    label=f"{target_channel} - Envolvente")
                            
                            plt.xlabel("Tiempo (s)")
                            plt.ylabel("Amplitud")
                            plt.title(f"{file_name} - Grasp {grasp} - {target_channel}")
                            plt.legend()
                            plt.grid(True)
                            plt.tight_layout()
                            plt.show()
                        except Exception as e:
                            print(f"Error processing grasp {grasp}: {str(e)}")
                            continue

def plots_Windowed_all_channels(fm, window_length, overlap, cutoff_freq, envelope_type, filtered_labels):
    """
    Función para graficar las ventanas de todos los canales EMG en un único plot.
    
    Parámetros:
    - fm: Frecuencia de muestreo en Hz.
    - window_length: Longitud de la ventana en muestras.
    - overlap: Porcentaje de superposición entre ventanas.
    - cutoff_freq: Frecuencia de corte del filtro.
    - envelope_type: Tipo de envolvente (1: RMS, 2: lowpass rectificado, etc.).
    - filtered_labels: Lista de etiquetas que se desean mantener.
    """

    for folder in os.listdir(data_path):
        if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
            folder_path = os.path.join(data_path, folder)

            for file_name in os.listdir(folder_path):
                if file_name.endswith('.mat'):
                    file_path = os.path.join(folder_path, file_name)

                    try:
                        mat_data = src.loadmatNina(database, file_name, subject=folder)
                    except Exception as e:
                        print(f"Error loading {file_name}: {str(e)}")
                        continue

                    df_norm, grasps_etiquetados = src.build_dataframe(
                        mat_file=mat_data,
                        database=database,
                        filename=file_name,
                        rectify=False,
                        normalize=True
                    )
                    df_norm = df_norm[df_norm['relabeled'].isin(filtered_labels)]

                    # Detectar todos los canales EMG válidos
                    emg_channels = [col for col in df_norm.columns if col.startswith("Channel")]

                    # Procesar cada grasp
                    for grasp in grasps_etiquetados:
                        try:
                            print(f"\nProcesando grasp {grasp} en {file_name}...")

                            # Filtrar por grasp
                            grasp_df = df_norm[df_norm["stimulus"] == grasp]
                            if grasp_df.empty:
                                continue

                            # Obtener envolventes de todos los canales
                            envelope_df = src.get_envelope_lowpass(grasp_df[emg_channels], fm=fm, cutoff_freq=cutoff_freq, envelope_type=envelope_type)

                            # Crear ventanas
                            ventanas = src.create_windows_with_overlap(envelope_df, window_length, overlap)
                            ventanas_df = [pd.DataFrame(ventana, columns=emg_channels) for ventana in ventanas if len(ventana) == window_length]

                            if not ventanas_df:
                                continue

                            # Tomar la primera ventana y graficar todos los canales en un solo plot
                            plt.figure(figsize=(12, 6))
                            tiempo = np.linspace(0, window_length / fm, window_length)
                            for channel in emg_channels:
                                plt.plot(tiempo, ventanas_df[0][channel], label=channel)

                            plt.title(f"{file_name} - Grasp {grasp} - Todos los canales")
                            plt.xlabel("Tiempo (s)")
                            plt.ylabel("Amplitud EMG (envolvente)")
                            plt.legend(ncol=4, fontsize='small')
                            plt.grid(True)
                            plt.tight_layout()
                            plt.show()
                        except Exception as e:
                            print(f"Error procesando grasp {grasp}: {str(e)}")
                            continue

def envelope_raw_all_channels(fm, window_length, overlap, cutoff_freq, envelope_type, filtered_labels):
    """
    Función para graficar la señal cruda y la envolvente de todos los canales EMG disponibles.
    
    Parámetros:
    - fm: Frecuencia de muestreo en Hz.
    - window_length: Longitud de la ventana en muestras.
    - overlap: Porcentaje de superposición entre ventanas.
    - cutoff_freq: Frecuencia de corte para el filtro de envolvente.
    - envelope_type: Tipo de envolvente (0 = rectificada, 1 = RMS).
    - filtered_labels: Lista de etiquetas de grasp a conservar.
    """
    for folder in os.listdir(data_path):
        if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
            folder_path = os.path.join(data_path, folder)
            
            for file_name in os.listdir(folder_path):
                if file_name.endswith('.mat'):
                    file_path = os.path.join(folder_path, file_name)
                    
                    try:
                        mat_data = src.loadmatNina(database, file_name, subject=folder)
                    except Exception as e:
                        print(f"Error loading {file_name}: {str(e)}")
                        continue
                    
                    df_norm, grasps_etiquetados = src.build_dataframe(
                        mat_file=mat_data,
                        database=database,
                        filename=file_name,
                        rectify=False,
                        normalize=True
                    )
                    df_norm = df_norm[df_norm['relabeled'].isin(filtered_labels)]

                    # Detectar columnas de EMG
                    emg_columns = [col for col in df_norm.columns if col.startswith('Channel')]
                    if not emg_columns:
                        print(f"No se encontraron canales EMG en {file_name}, omitiendo.")
                        continue

                    print(f"{file_name}: canales EMG detectados: {emg_columns}")
                    
                    # Señal cruda
                    raw_signal = df_norm[emg_columns].copy()
                    
                    # Señal con envolvente aplicada
                    envelope_df = src.get_envelope_lowpass(df_norm[emg_columns], fm=fm, cutoff_freq=cutoff_freq, envelope_type=envelope_type)

                    meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                    result_df = pd.concat([envelope_df, df_norm[meta_columns]], axis=1)

                    for grasp in grasps_etiquetados:
                        try:
                            print(f"\nProcesando grasp {grasp} en {file_name}...")
                            grasp_df = result_df[result_df['stimulus'] == grasp]
                            raw_grasp_df = raw_signal[df_norm['stimulus'] == grasp]

                            if grasp_df.empty or raw_grasp_df.empty:
                                print(f"No hay datos válidos para grasp {grasp} en {file_name}.")
                                continue

                            ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                            ventanas_raw = src.create_windows_with_overlap(raw_grasp_df, window_length, overlap)

                            ventanas_df = [pd.DataFrame(v, columns=emg_columns) for v in ventanas if len(v) == window_length]
                            ventanas_raw_df = [pd.DataFrame(v, columns=emg_columns) for v in ventanas_raw if len(v) == window_length]

                            if not ventanas_df or not ventanas_raw_df:
                                print(f"No hay ventanas válidas para el grasp {grasp}.")
                                continue

                            # Graficar la primera ventana
                            tiempo = np.linspace(0, window_length / fm, window_length)
                            plt.figure(figsize=(14, 6))

                            for ch in emg_columns:
                                plt.plot(tiempo, ventanas_raw_df[0][ch], alpha=0.5, label=f"{ch} Cruda")
                                plt.plot(tiempo, ventanas_df[0][ch], linewidth=2, label=f"{ch} Envolvente")

                            plt.xlabel("Tiempo (s)")
                            plt.ylabel("Amplitud")
                            plt.title(f"{file_name} - Grasp {grasp} - Todos los canales")
                            plt.legend(ncol=2, fontsize='small')
                            plt.grid(True)
                            plt.tight_layout()
                            plt.show()
                        except Exception as e:
                            print(f"Error al procesar grasp {grasp}: {str(e)}")
                            continue

## Plots

- Raw Plots

In [None]:
# plots_Windowed(
#     fm=2000,
#     window_length=200,
#     overlap=0,
#     target_channels=emg_columns,  
#     cutoff_freq=0.6,
#     envelope_type=1,
#     filtered_labels=filtered_labels
# )

In [None]:
# plots_Windowed(
#     fm=2000,
#     window_length=400,
#     overlap=0,
#     target_channels=emg_columns,  
#     cutoff_freq=0.6,
#     envelope_type=1,
#     filtered_labels=filtered_labels
# )

In [None]:
# plots_Windowed(
#     fm=2000,
#     window_length=600,
#     overlap=0,
#     target_channels=emg_columns, 
#     cutoff_freq=0.6,
#     envelope_type=1,
#     filtered_labels=filtered_labels
# )

- Envelope signal

In [None]:
fm = 2000
window_length = 200
overlap = 0
cutoff_freq = 0.6
envelope_type = 1

envelope_raw_all_channels(
    fm=fm,
    window_length=window_length,
    overlap=overlap,
    cutoff_freq=cutoff_freq,
    envelope_type=envelope_type,
    filtered_labels=filtered_labels
)

In [None]:
# fm = 2000
# window_length = 400
# overlap = 0
# cutoff_freq = 0.6
# envelope_type = 1

# envelope_raw_all_channels(
#     fm=fm,
#     window_length=window_length,
#     overlap=overlap,
#     cutoff_freq=cutoff_freq,
#     envelope_type=envelope_type,
#     filtered_labels=filtered_labels
# )

In [None]:
# fm = 2000
# window_length = 600
# overlap = 0
# cutoff_freq = 0.6
# envelope_type = 1


# envelope_raw_all_channels(
#     fm=fm,
#     window_length=window_length,
#     overlap=overlap,
#     cutoff_freq=cutoff_freq,
#     envelope_type=envelope_type,
#     filtered_labels=filtered_labels
# )

## Windowed

- 300 ms

In [None]:
fm = 2000
overlap = 0
cutoff_freq = 0.6
envelope_type = 1

In [None]:
# window_length = 600
# # Lista para almacenar las métricas de todas las ventanas
# all_metrics = []

# # Obtener nombres de los canales EMG (suponiendo que empiezan por 'Channel')
# emg_columns = [col for col in combined_df.columns if col.startswith('Channel')]

# for folder in os.listdir(data_path):
#     if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
#         folder_path = os.path.join(data_path, folder)

#         for file_name in os.listdir(folder_path):
#             if file_name.endswith('.mat'):
#                 file_path = os.path.join(folder_path, file_name)

#                 try:
#                     mat_data = src.loadmatNina(database, file_name, subject=folder)
#                 except Exception as e:
#                     print(f"Error loading {file_name}: {str(e)}")
#                     continue

#                 test_df, grasps = src.build_dataframe(
#                     mat_file=mat_data,
#                     database=database,
#                     filename=file_name,
#                     rectify=False,
#                     normalize=True
#                 )

#                 test_df = test_df[test_df['relabeled'].isin(filtered_labels)]

#                 # Extraer la envolvente de todos los canales EMG
#                 envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=fm, cutoff_freq=0.6, envelope_type=1)

#                 # Añadir columnas meta
#                 meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
#                 result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)

#                 for grasp in grasps:
#                     grasp_df = result_df[result_df['stimulus'] == grasp]

#                     if grasp_df.empty:
#                         print(f"No hay datos para el grasp {grasp} en {file_name}.")
#                         continue

#                     for channel in emg_columns:
#                         ventanas = src.create_windows_with_overlap(grasp_df[[channel, 'relabeled']], window_length, overlap)

#                         for i, ventana in enumerate(ventanas):
#                             if len(ventana) == window_length:
#                                 signal = ventana[channel].values
#                                 metrics = calculate_emg_metrics_means(signal)

#                                 metrics_with_meta = {
#                                     "subject": folder,
#                                     "relabeled": grasp_df['relabeled'].iloc[0],
#                                     "stimulus": grasp,
#                                     "channel": channel,
#                                     "window_id": f"{file_name}_{grasp}_{channel}_{i}",
#                                     "file_name": file_name,
#                                     "window_number": i,
#                                     **metrics
#                                 }
#                                 all_metrics.append(metrics_with_meta)

# # Crear DataFrame con todas las métricas calculadas
# metrics_df = pd.DataFrame(all_metrics)

# # Ordenar columnas
# meta_cols = ["subject", "relabeled", "stimulus", "channel", "window_id", "file_name", "window_number"]
# metric_cols = [col for col in metrics_df.columns if col not in meta_cols]
# metrics_df = metrics_df[meta_cols + sorted(metric_cols)]

# # 1. DataFrame con estructura por ventana (como pediste)
# summary_per_window = metrics_df[['window_id', 'file_name', 'window_number'] + sorted(metric_cols)]
# display(summary_per_window)

# # 2. Promedio por canal y tipo de movimiento
# summary_by_channel = metrics_df.groupby(['relabeled', 'channel']).mean(numeric_only=True).reset_index()
# display(summary_by_channel)

# # 3. Promedio final por tipo de movimiento (acumulado)
# summary_by_movement = summary_by_channel.groupby(['relabeled']).mean(numeric_only=True).reset_index()
# display(summary_by_movement)

- 200 ms

In [None]:
window_length = 400
all_metrics = []

emg_columns = [col for col in combined_df.columns if col.startswith('Channel')]

for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)

        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)

                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue

                test_df, grasps = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )

                test_df = test_df[test_df['relabeled'].isin(filtered_labels)]

                envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=fm, cutoff_freq=0.6, envelope_type=1)

                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)

                for grasp in grasps:
                    grasp_df = result_df[result_df['stimulus'] == grasp]

                    if grasp_df.empty:
                        continue

                    # Agrupar por repetición
                    for repetition in grasp_df['re_repetition'].unique():
                        rep_df = grasp_df[grasp_df['re_repetition'] == repetition]

                        for channel in emg_columns:
                            ventanas = src.create_windows_with_overlap(rep_df[[channel, 'relabeled']], window_length, overlap)

                            for i, ventana in enumerate(ventanas):
                                if len(ventana) == window_length:
                                    signal = ventana[channel].values
                                    metrics = calculate_emg_metrics_means(signal)

                                    metrics_with_meta = {
                                        "subject": folder,
                                        "relabeled": rep_df['relabeled'].iloc[0],
                                        "stimulus": grasp,
                                        "channel": channel,
                                        "re_repetition": repetition,
                                        "window_id": f"{file_name}_{grasp}_{channel}_{repetition}_{i}",
                                        "file_name": file_name,
                                        "window_number": i,
                                        **metrics
                                    }
                                    all_metrics.append(metrics_with_meta)

# Crear DataFrame con todas las métricas calculadas
metrics_df_200 = pd.DataFrame(all_metrics)

# Ordenar columnas
meta_cols = ["subject", "relabeled", "stimulus", "channel", "re_repetition", "window_id", "file_name", "window_number"]
metric_cols_200 = [col for col in metrics_df_200.columns if col not in meta_cols]
metrics_df_200 = metrics_df_200[meta_cols + sorted(metric_cols_200)]

# 1. Estructura por ventana
summary_per_window_200 = metrics_df_200[['window_id', 'file_name', 'window_number'] + sorted(metric_cols_200)]
display(summary_per_window_200)

# 2. Promedio por canal, repetición y tipo de movimiento
summary_by_channel_rep_200 = metrics_df_200.groupby(['subject', 'relabeled', 'channel', 're_repetition']).mean(numeric_only=True).reset_index()
display(summary_by_channel_rep_200)

# 3. Promedio final por tipo de movimiento (acumulado)
summary_by_movement_200 = summary_by_channel_rep_200.groupby(['relabeled']).mean(numeric_only=True).reset_index()
display(summary_by_movement_200)


In [None]:
# Eliminar columnas que contengan "channel11" o "channel12" (sin distinguir mayúsculas)
metrics_df_200 = metrics_df_200[~metrics_df_200['channel'].isin(['Channel 11', 'Channel 12'])]

metrics_df_200

In [None]:
import pandas as pd

# 1. Eliminar columnas no deseadas
cols_to_drop = ['Kurt', 'ZC', 'MNF', 'MNP', ' SSC', 'TD', 'VAR', 'WL']
metrics_df_filtered = metrics_df_200.drop(columns=cols_to_drop, errors='ignore')

# 1.1. Eliminar columnas con nombres mal formateados (ej. 'ChannelChannel 3_window_number')
metrics_df_filtered = metrics_df_filtered.loc[:, ~metrics_df_filtered.columns.str.contains("ChannelChannel")]

# 2. Agrupar por subject, relabeled, re_repetition y channel, promediando las métricas
grouped = metrics_df_filtered.groupby(['subject', 'relabeled', 're_repetition', 'channel']).mean(numeric_only=True).reset_index()

# 3. Pivotar para reorganizar métricas por canal
pivoted = grouped.pivot_table(
    index=['subject', 'relabeled', 're_repetition'],
    columns='channel'
)

# 4. Aplanar MultiIndex de columnas (por ejemplo: Channel1_IAV)
pivoted.columns = [f"{col[1]}_{col[0]}" for col in pivoted.columns]
pivoted = pivoted.reset_index()
from sklearn.preprocessing import MinMaxScaler

from sklearn.preprocessing import MinMaxScaler

# Separar columnas que no se deben normalizar
non_normalized_cols = ['subject', 'relabeled', 're_repetition']
columns_to_normalize = [col for col in pivoted.columns if col not in non_normalized_cols]

# Inicializar el escalador
scaler = MinMaxScaler()

# Aplicar MinMaxScaler solo a las columnas deseadas
normalized_data = scaler.fit_transform(pivoted[columns_to_normalize])

# Combinar columnas no normalizadas con las normalizadas
pivoted_normalized = pd.concat(
    [pivoted[non_normalized_cols].reset_index(drop=True),
     pd.DataFrame(normalized_data, columns=columns_to_normalize)],
    axis=1
)


# 5. Guardar en CSV
pivoted_normalized.to_csv("metrics_avg_by_repetition_tesis_3.csv", index=False)

print("✅ DataFrame transformado y guardado como 'metrics_avg_by_repetition.csv'")


In [None]:
pivoted_normalized

In [None]:
print(pivoted_normalized.columns.tolist())

In [None]:
# import pandas as pd

# # Suponiendo que ya tienes cargado el DataFrame llamado summary_by_channel_200

# # 1. Pivotar el DataFrame
# df_pivoted = summary_by_channel_200.pivot_table(
#     index=['relabeled', 'window_number'],
#     columns='channel'
# )

# # 2. Aplanar columnas MultiIndex
# df_pivoted.columns = [f"Channel{col[1]}_{col[0]}" for col in df_pivoted.columns]
# df_pivoted = df_pivoted.reset_index()

# # 3. Eliminar columnas no deseadas
# columns_to_drop = [col for col in df_pivoted.columns if any(x in col for x in ['Kurt', 'ZC'])]
# df_cleaned = df_pivoted.drop(columns=columns_to_drop)
# display(df_cleaned)
# # 4. Guardar como CSV
# df_cleaned.to_csv("summary_by_window.csv", index=False)

# print("✅ DataFrame reestructurado y guardado como 'summary_by_window.csv'")


- 100 ms

In [None]:
# window_length = 200
# # Lista para almacenar las métricas de todas las ventanas
# all_metrics = []

# # Obtener nombres de los canales EMG (suponiendo que empiezan por 'Channel')
# emg_columns = [col for col in combined_df.columns if col.startswith('Channel')]

# for folder in os.listdir(data_path):
#     if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
#         folder_path = os.path.join(data_path, folder)

#         for file_name in os.listdir(folder_path):
#             if file_name.endswith('.mat'):
#                 file_path = os.path.join(folder_path, file_name)

#                 try:
#                     mat_data = src.loadmatNina(database, file_name, subject=folder)
#                 except Exception as e:
#                     print(f"Error loading {file_name}: {str(e)}")
#                     continue

#                 test_df, grasps = src.build_dataframe(
#                     mat_file=mat_data,
#                     database=database,
#                     filename=file_name,
#                     rectify=False,
#                     normalize=True
#                 )

#                 test_df = test_df[test_df['relabeled'].isin(filtered_labels)]

#                 # Extraer la envolvente de todos los canales EMG
#                 envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=fm, cutoff_freq=0.6, envelope_type=1)

#                 # Añadir columnas meta
#                 meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
#                 result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)

#                 for grasp in grasps:
#                     grasp_df = result_df[result_df['stimulus'] == grasp]

#                     if grasp_df.empty:
#                         print(f"No hay datos para el grasp {grasp} en {file_name}.")
#                         continue

#                     for channel in emg_columns:
#                         ventanas = src.create_windows_with_overlap(grasp_df[[channel, 'relabeled']], window_length, overlap)

#                         for i, ventana in enumerate(ventanas):
#                             if len(ventana) == window_length:
#                                 signal = ventana[channel].values
#                                 metrics = calculate_emg_metrics_means(signal)

#                                 metrics_with_meta = {
#                                     "subject": folder,
#                                     "relabeled": grasp_df['relabeled'].iloc[0],
#                                     "stimulus": grasp,
#                                     "channel": channel,
#                                     "window_id": f"{file_name}_{grasp}_{channel}_{i}",
#                                     "file_name": file_name,
#                                     "window_number": i,
#                                     **metrics
#                                 }
#                                 all_metrics.append(metrics_with_meta)

# # Crear DataFrame con todas las métricas calculadas
# metrics_df_100 = pd.DataFrame(all_metrics)

# # Ordenar columnas
# meta_cols = ["subject", "relabeled", "stimulus", "channel", "window_id", "file_name", "window_number"]
# metric_cols_100 = [col for col in metrics_df_100.columns if col not in meta_cols]
# metrics_df_100 = metrics_df_100[meta_cols + sorted(metric_cols)]

# # 1. DataFrame con estructura por ventana (como pediste)
# summary_per_window_100 = metrics_df_100[['window_id', 'file_name', 'window_number'] + sorted(metric_cols_100)]
# display(summary_per_window_100)

# # 2. Promedio por canal y tipo de movimiento
# summary_by_channel_100 = metrics_df_100.groupby(['relabeled', 'channel']).mean(numeric_only=True).reset_index()
# display(summary_by_channel_100)

# # 3. Promedio final por tipo de movimiento (acumulado)
# summary_by_movement_100 = summary_by_channel_100.groupby(['relabeled']).mean(numeric_only=True).reset_index()
# display(summary_by_movement_100)

## Dataframes for grasp with enveloped

- 300 ms

In [None]:
# # Parámetros para el ventaneado
# fm = 2000  # Frecuencia de muestreo en Hz
# window_length = 600  # Ventana de 300 ms en muestras
# overlap = 0  # Sin superposición

# # Diccionario para almacenar DataFrames por cada valor único de 'relabeled'
# metrics_dfs_by_relabeled = {}

# # Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
# for folder in os.listdir(data_path):
#     if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
#         folder_path = os.path.join(data_path, folder)

#         for file_name in os.listdir(folder_path):
#             if file_name.endswith('.mat'):
#                 file_path = os.path.join(folder_path, file_name)

#                 try:
#                     mat_data = src.loadmatNina(database, file_name, subject=folder)
#                 except Exception as e:
#                     print(f"Error loading {file_name}: {str(e)}")
#                     continue

#                 test_df, grasps = src.build_dataframe(
#                     mat_file=mat_data,
#                     database=database,
#                     filename=file_name,
#                     rectify=False,
#                     normalize=True
#                 )
#                 test_df = test_df[test_df['relabeled'].isin(filtered_labels)]

#                 # Identificar todos los canales EMG
#                 emg_columns = [col for col in test_df.columns if col.startswith("Channel")]
#                 if not emg_columns:
#                     print(f"No se encontraron canales EMG en {file_name}.")
#                     continue

#                 # Aplicar extracción del envelope a todos los canales
#                 envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=fm, cutoff_freq=0.6, envelope_type=1)

#                 # Conservar columnas meta
#                 meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
#                 result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)

#                 for grasp in grasps:
#                     grasp_df = result_df[result_df['stimulus'] == grasp]

#                     if grasp_df.empty:
#                         continue

#                     # Procesar por canal y luego promediar
#                     metrics_by_window = []

#                     for channel in emg_columns:
#                         ventanas = src.create_windows_with_overlap(grasp_df[[channel, 'relabeled']], window_length, overlap)

#                         for ventana in ventanas:
#                             if len(ventana) == window_length:
#                                 signal = ventana[channel].values
#                                 metrics = calculate_emg_metrics_means(signal)
#                                 metrics_by_window.append(metrics)

#                     # Si se calcularon métricas en al menos un canal
#                     if metrics_by_window:
#                         avg_metrics = pd.DataFrame(metrics_by_window).mean().to_dict()

#                         relabeled_value = grasp_df['relabeled'].iloc[0]
#                         metrics_with_meta = {
#                             "subject": folder,
#                             "relabeled": relabeled_value,
#                             "stimulus": grasp,
#                             **avg_metrics
#                         }

#                         if relabeled_value not in metrics_dfs_by_relabeled:
#                             metrics_dfs_by_relabeled[relabeled_value] = []

#                         metrics_dfs_by_relabeled[relabeled_value].append(metrics_with_meta)

# # Convertir a DataFrames y agrupar por sujeto y relabeled
# for relabeled_value, data in metrics_dfs_by_relabeled.items():
#     df = pd.DataFrame(data)
#     df_mean = df.groupby(["subject", "relabeled", "stimulus"]).mean(numeric_only=True).reset_index()
#     var_name = f"df_relabeled_{relabeled_value}_300"
#     globals()[var_name] = df_mean
#     display(globals()[var_name])


- 200 ms

In [None]:
# Parámetros para el ventaneado
fm = 2000  # Frecuencia de muestreo en Hz
window_length = 400  # Ventana de 200 ms en muestras
overlap = 0  # Sin superposición

# Diccionario para almacenar DataFrames por cada valor único de 'relabeled'
metrics_dfs_by_relabeled = {}

# Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)

        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)

                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue

                test_df, grasps = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                test_df = test_df[test_df['relabeled'].isin(filtered_labels)]

                # Identificar todos los canales EMG
                emg_columns = [col for col in test_df.columns if col.startswith("Channel")]
                if not emg_columns:
                    print(f"No se encontraron canales EMG en {file_name}.")
                    continue

                # Aplicar extracción del envelope a todos los canales
                envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=fm, cutoff_freq=0.6, envelope_type=1)

                # Conservar columnas meta
                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)

                for grasp in grasps:
                    grasp_df = result_df[result_df['stimulus'] == grasp]

                    if grasp_df.empty:
                        continue

                    # Procesar por canal y luego promediar
                    metrics_by_window = []

                    for channel in emg_columns:
                        ventanas = src.create_windows_with_overlap(grasp_df[[channel, 'relabeled']], window_length, overlap)

                        for ventana in ventanas:
                            if len(ventana) == window_length:
                                signal = ventana[channel].values
                                metrics = calculate_emg_metrics_means(signal)
                                metrics_by_window.append(metrics)

                    # Si se calcularon métricas en al menos un canal
                    if metrics_by_window:
                        avg_metrics = pd.DataFrame(metrics_by_window).mean().to_dict()

                        relabeled_value = grasp_df['relabeled'].iloc[0]
                        metrics_with_meta = {
                            "subject": folder,
                            "relabeled": relabeled_value,
                            "stimulus": grasp,
                            **avg_metrics
                        }

                        if relabeled_value not in metrics_dfs_by_relabeled:
                            metrics_dfs_by_relabeled[relabeled_value] = []

                        metrics_dfs_by_relabeled[relabeled_value].append(metrics_with_meta)

# Convertir a DataFrames y agrupar por sujeto y relabeled
for relabeled_value, data in metrics_dfs_by_relabeled.items():
    df = pd.DataFrame(data)
    df_mean = df.groupby(["subject", "relabeled", "stimulus"]).mean(numeric_only=True).reset_index()
    var_name = f"df_relabeled_{relabeled_value}_200"
    globals()[var_name] = df_mean
    display(globals()[var_name])

- 100 ms

In [None]:
# # Parámetros para el ventaneado
# fm = 2000  # Frecuencia de muestreo en Hz
# window_length = 200  # Ventana de 100 ms en muestras
# overlap = 0  # Sin superposición

# # Diccionario para almacenar DataFrames por cada valor único de 'relabeled'
# metrics_dfs_by_relabeled = {}

# # Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
# for folder in os.listdir(data_path):
#     if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
#         folder_path = os.path.join(data_path, folder)

#         for file_name in os.listdir(folder_path):
#             if file_name.endswith('.mat'):
#                 file_path = os.path.join(folder_path, file_name)

#                 try:
#                     mat_data = src.loadmatNina(database, file_name, subject=folder)
#                 except Exception as e:
#                     print(f"Error loading {file_name}: {str(e)}")
#                     continue

#                 test_df, grasps = src.build_dataframe(
#                     mat_file=mat_data,
#                     database=database,
#                     filename=file_name,
#                     rectify=False,
#                     normalize=True
#                 )
#                 test_df = test_df[test_df['relabeled'].isin(filtered_labels)]

#                 # Identificar todos los canales EMG
#                 emg_columns = [col for col in test_df.columns if col.startswith("Channel")]
#                 if not emg_columns:
#                     print(f"No se encontraron canales EMG en {file_name}.")
#                     continue

#                 # Aplicar extracción del envelope a todos los canales
#                 envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=fm, cutoff_freq=0.6, envelope_type=1)

#                 # Conservar columnas meta
#                 meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
#                 result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)

#                 for grasp in grasps:
#                     grasp_df = result_df[result_df['stimulus'] == grasp]

#                     if grasp_df.empty:
#                         continue

#                     # Procesar por canal y luego promediar
#                     metrics_by_window = []

#                     for channel in emg_columns:
#                         ventanas = src.create_windows_with_overlap(grasp_df[[channel, 'relabeled']], window_length, overlap)

#                         for ventana in ventanas:
#                             if len(ventana) == window_length:
#                                 signal = ventana[channel].values
#                                 metrics = calculate_emg_metrics_means(signal)
#                                 metrics_by_window.append(metrics)

#                     # Si se calcularon métricas en al menos un canal
#                     if metrics_by_window:
#                         avg_metrics = pd.DataFrame(metrics_by_window).mean().to_dict()

#                         relabeled_value = grasp_df['relabeled'].iloc[0]
#                         metrics_with_meta = {
#                             "subject": folder,
#                             "relabeled": relabeled_value,
#                             "stimulus": grasp,
#                             **avg_metrics
#                         }

#                         if relabeled_value not in metrics_dfs_by_relabeled:
#                             metrics_dfs_by_relabeled[relabeled_value] = []

#                         metrics_dfs_by_relabeled[relabeled_value].append(metrics_with_meta)

# # Convertir a DataFrames y agrupar por sujeto y relabeled
# for relabeled_value, data in metrics_dfs_by_relabeled.items():
#     df = pd.DataFrame(data)
#     df_mean = df.groupby(["subject", "relabeled", "stimulus"]).mean(numeric_only=True).reset_index()
#     var_name = f"df_relabeled_{relabeled_value}_100"
#     globals()[var_name] = df_mean
#     display(globals()[var_name])

In [None]:
# Crear una lista para almacenar todos los DataFrames promediados
all_metrics_dfs = []

# Convertir listas en DataFrames, promediar por sujeto y asignar variables
for relabeled_value, data in metrics_dfs_by_relabeled.items():
    df = pd.DataFrame(data)
    
    df_mean = df.groupby(["subject", "relabeled", "stimulus"]).mean().reset_index()
    var_name = f"df_relabeled_{relabeled_value}_200"
    globals()[var_name] = df_mean
    #display(globals()[var_name])

    # Agregar al DataFrame general
    all_metrics_dfs.append(df_mean)

# Concatenar todos los DataFrames en uno solo
df_all = pd.concat(all_metrics_dfs, ignore_index=True)
df_metrics = df_all.drop(columns=['ZC','Kurt'])
# Guardar en un archivo CSV
df_metrics.to_csv("metrics_all_subjects.csv", index=False)

display(df_all)
display(df_metrics)

### Normalized

In [None]:
# Crear una lista para almacenar todos los DataFrames promediados
all_metrics_dfs = []

# Convertir listas en DataFrames, promediar por sujeto y asignar variables
for relabeled_value, data in metrics_dfs_by_relabeled.items():
    df = pd.DataFrame(data)
    df_mean = df.groupby(["subject", "relabeled", "stimulus"]).mean().reset_index()
    var_name = f"df_relabeled_{relabeled_value}_200"
    globals()[var_name] = df_mean
    all_metrics_dfs.append(df_mean)

# Concatenar todos los DataFrames en uno solo
df_all = pd.concat(all_metrics_dfs, ignore_index=True)

# Eliminar columnas no deseadas antes de normalizar
df_metrics = df_all.drop(columns=['ZC', 'Kurt'])

# Seleccionar columnas numéricas a normalizar (ignorando las categóricas)
numeric_cols = df_metrics.select_dtypes(include='number').columns.difference(['subject', 'relabeled', 'stimulus'])

# Aplicar normalización Min-Max
scaler = MinMaxScaler()
df_metrics[numeric_cols] = scaler.fit_transform(df_metrics[numeric_cols])

# Guardar en un archivo CSV
df_metrics.to_csv("metrics_all_subjects_normalized.csv", index=False)

# Mostrar resultados
display(df_all)
display(df_metrics)


## Summary by movement

- 300 ms

In [None]:
# # Parámetros para el ventaneado
# fm = 2000  # Frecuencia de muestreo en Hz
# window_length = 600  # Ventana de 300 ms en muestras
# overlap = 0  # Sin superposición

# # Lista para almacenar todas las métricas
# all_metrics = []

# # Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
# for folder in os.listdir(data_path):
#     if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
#         folder_path = os.path.join(data_path, folder)
        
#         for file_name in os.listdir(folder_path):
#             if file_name.endswith('.mat'):
#                 file_path = os.path.join(folder_path, file_name)

#                 try:
#                     mat_data = src.loadmatNina(database, file_name, subject=folder)
#                 except Exception as e:
#                     print(f"Error loading {file_name}: {str(e)}")
#                     continue

#                 test_df, grasps = src.build_dataframe(
#                     mat_file=mat_data,
#                     database=database,
#                     filename=file_name,
#                     rectify=False,
#                     normalize=True
#                 )

#                 test_df = test_df[test_df['relabeled'].isin(filtered_labels)]

#                 # Detectar canales EMG
#                 emg_columns = [col for col in test_df.columns if col.startswith("Channel")]
#                 if not emg_columns:
#                     print(f"No se encontraron canales EMG en {file_name}")
#                     continue

#                 # Calcular envelope
#                 envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=fm, cutoff_freq=0.6, envelope_type=1)

#                 # Agregar metadata
#                 meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
#                 result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)

#                 for grasp in grasps:
#                     grasp_df = result_df[result_df['stimulus'] == grasp]
#                     if grasp_df.empty:
#                         continue

#                     ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
#                     for i, ventana in enumerate(ventanas):
#                         if len(ventana) != window_length:
#                             continue
                        
#                         metrics_per_channel = []
#                         for channel in emg_columns:
#                             signal = ventana[channel].values
#                             metrics = calculate_emg_metrics_means(signal)
#                             metrics_per_channel.append(metrics)

#                         # Promediar métricas entre canales
#                         if metrics_per_channel:
#                             avg_metrics = pd.DataFrame(metrics_per_channel).mean().to_dict()

#                             # Agregar metadata
#                             metrics_with_meta = {
#                                 "subject": folder,
#                                 "relabeled": ventana['relabeled'].iloc[0],
#                                 "stimulus": grasp,
#                                 "window_id": f"{file_name}_{grasp}_{i}",
#                                 "file_name": file_name,
#                                 "window_number": i,
#                                 **avg_metrics
#                             }

#                             all_metrics.append(metrics_with_meta)

# # Crear DataFrame con resultados
# metrics_df_all_channels = pd.DataFrame(all_metrics)

# # Reorganizar columnas
# meta_cols = ["subject", "relabeled", "stimulus", "window_id", "file_name", "window_number"]
# metric_cols = [col for col in metrics_df_all_channels.columns if col not in meta_cols]
# column_order = meta_cols + sorted(metric_cols)
# metrics_df_all_channels = metrics_df_all_channels[column_order]
# display(metrics_df_all_channels)

# # Agrupar y promediar por relabeled por sujeto y tipo de movimiento
# summary_by_relabeled_300 = metrics_df_all_channels.drop(columns=["window_id", "file_name", "window_number"])
# summary_by_relabeled_300 = summary_by_relabeled_300.groupby(["subject", "relabeled", "stimulus"]).mean(numeric_only=True).reset_index()

# print("\nResumen de métricas por sujeto, relabeled y tipo de movimiento:")
# display(summary_by_relabeled_300)

# # Agrupar y promediar por relabeled globalmente (todos los sujetos y estímulos)
# global_summary_by_relabeled_300 = (
#     metrics_df_all_channels
#     .drop(columns=["subject", "stimulus", "window_id", "file_name", "window_number"])
#     .groupby("relabeled")
#     .mean(numeric_only=True)
#     .reset_index()
# )

# print("\nResumen global de métricas promediadas por 'relabeled' (todos los sujetos):")
# display(global_summary_by_relabeled_300)

# # Estadísticas generales
# print(f"\nTotal de ventanas procesadas: {len(metrics_df_all_channels)}")
# print(f"Distribución por sujeto:\n{metrics_df_all_channels['subject'].value_counts()}")
# print(f"Distribución por movimiento:\n{metrics_df_all_channels['relabeled'].value_counts()}")

- 200 ms

In [None]:
# Parámetros para el ventaneado
fm = 2000  # Frecuencia de muestreo en Hz
window_length = 400  # Ventana de 300 ms en muestras
overlap = 0  # Sin superposición

# Lista para almacenar todas las métricas
all_metrics = []

# Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)

                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue

                test_df, grasps = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )

                test_df = test_df[test_df['relabeled'].isin(filtered_labels)]

                # Detectar canales EMG
                emg_columns = [col for col in test_df.columns if col.startswith("Channel")]
                if not emg_columns:
                    print(f"No se encontraron canales EMG en {file_name}")
                    continue

                # Calcular envelope
                envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=fm, cutoff_freq=0.6, envelope_type=1)

                # Agregar metadata
                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)

                for grasp in grasps:
                    grasp_df = result_df[result_df['stimulus'] == grasp]
                    if grasp_df.empty:
                        continue

                    ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                    for i, ventana in enumerate(ventanas):
                        if len(ventana) != window_length:
                            continue
                        
                        metrics_per_channel = []
                        for channel in emg_columns:
                            signal = ventana[channel].values
                            metrics = calculate_emg_metrics_means(signal)
                            metrics_per_channel.append(metrics)

                        # Promediar métricas entre canales
                        if metrics_per_channel:
                            avg_metrics = pd.DataFrame(metrics_per_channel).mean().to_dict()

                            # Agregar metadata
                            metrics_with_meta = {
                                "subject": folder,
                                "relabeled": ventana['relabeled'].iloc[0],
                                "stimulus": grasp,
                                "window_id": f"{file_name}_{grasp}_{i}",
                                "file_name": file_name,
                                "window_number": i,
                                **avg_metrics
                            }

                            all_metrics.append(metrics_with_meta)

# Crear DataFrame con resultados
metrics_df_all_channels = pd.DataFrame(all_metrics)

# Reorganizar columnas
meta_cols = ["subject", "relabeled", "stimulus", "window_id", "file_name", "window_number"]
metric_cols = [col for col in metrics_df_all_channels.columns if col not in meta_cols]
column_order = meta_cols + sorted(metric_cols)
metrics_df_all_channels = metrics_df_all_channels[column_order]
display(metrics_df_all_channels)

# Agrupar y promediar por relabeled por sujeto y tipo de movimiento
summary_by_relabeled_200 = metrics_df_all_channels.drop(columns=["window_id", "file_name", "window_number"])
summary_by_relabeled_200 = summary_by_relabeled_200.groupby(["subject", "relabeled", "stimulus"]).mean(numeric_only=True).reset_index()

print("\nResumen de métricas por sujeto, relabeled y tipo de movimiento:")
display(summary_by_relabeled_200)

# Agrupar y promediar por relabeled globalmente (todos los sujetos y estímulos)
global_summary_by_relabeled_200 = (
    metrics_df_all_channels
    .drop(columns=["subject", "stimulus", "window_id", "file_name", "window_number"])
    .groupby("relabeled")
    .mean(numeric_only=True)
    .reset_index()
)

print("\nResumen global de métricas promediadas por 'relabeled' (todos los sujetos):")
display(global_summary_by_relabeled_200)

# Estadísticas generales
print(f"\nTotal de ventanas procesadas: {len(metrics_df_all_channels)}")
print(f"Distribución por sujeto:\n{metrics_df_all_channels['subject'].value_counts()}")
print(f"Distribución por movimiento:\n{metrics_df_all_channels['relabeled'].value_counts()}")

In [None]:
# Eliminar columnas no deseadas antes de guardar
columns_to_exclude = ['ZC', 'ZC_STD', 'Kurt', 'Kurt_STD']
filtered_summary = summary_by_relabeled_200.drop(columns=[col for col in columns_to_exclude if col in summary_by_relabeled_200.columns])

# Guardar en CSV
filtered_summary.to_csv('summary_by_relabeled_200_filtered.csv', index=False)


- 100 ms

In [None]:
# # Parámetros para el ventaneado
# fm = 2000  # Frecuencia de muestreo en Hz
# window_length = 200  # Ventana de 300 ms en muestras
# overlap = 0  # Sin superposición

# # Lista para almacenar todas las métricas
# all_metrics = []

# # Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
# for folder in os.listdir(data_path):
#     if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
#         folder_path = os.path.join(data_path, folder)
        
#         for file_name in os.listdir(folder_path):
#             if file_name.endswith('.mat'):
#                 file_path = os.path.join(folder_path, file_name)

#                 try:
#                     mat_data = src.loadmatNina(database, file_name, subject=folder)
#                 except Exception as e:
#                     print(f"Error loading {file_name}: {str(e)}")
#                     continue

#                 test_df, grasps = src.build_dataframe(
#                     mat_file=mat_data,
#                     database=database,
#                     filename=file_name,
#                     rectify=False,
#                     normalize=True
#                 )

#                 test_df = test_df[test_df['relabeled'].isin(filtered_labels)]

#                 # Detectar canales EMG
#                 emg_columns = [col for col in test_df.columns if col.startswith("Channel")]
#                 if not emg_columns:
#                     print(f"No se encontraron canales EMG en {file_name}")
#                     continue

#                 # Calcular envelope
#                 envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=fm, cutoff_freq=0.6, envelope_type=1)

#                 # Agregar metadata
#                 meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
#                 result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)

#                 for grasp in grasps:
#                     grasp_df = result_df[result_df['stimulus'] == grasp]
#                     if grasp_df.empty:
#                         continue

#                     ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
#                     for i, ventana in enumerate(ventanas):
#                         if len(ventana) != window_length:
#                             continue
                        
#                         metrics_per_channel = []
#                         for channel in emg_columns:
#                             signal = ventana[channel].values
#                             metrics = calculate_emg_metrics_means(signal)
#                             metrics_per_channel.append(metrics)

#                         # Promediar métricas entre canales
#                         if metrics_per_channel:
#                             avg_metrics = pd.DataFrame(metrics_per_channel).mean().to_dict()

#                             # Agregar metadata
#                             metrics_with_meta = {
#                                 "subject": folder,
#                                 "relabeled": ventana['relabeled'].iloc[0],
#                                 "stimulus": grasp,
#                                 "window_id": f"{file_name}_{grasp}_{i}",
#                                 "file_name": file_name,
#                                 "window_number": i,
#                                 **avg_metrics
#                             }

#                             all_metrics.append(metrics_with_meta)

# # Crear DataFrame con resultados
# metrics_df_all_channels = pd.DataFrame(all_metrics)

# # Reorganizar columnas
# meta_cols = ["subject", "relabeled", "stimulus", "window_id", "file_name", "window_number"]
# metric_cols = [col for col in metrics_df_all_channels.columns if col not in meta_cols]
# column_order = meta_cols + sorted(metric_cols)
# metrics_df_all_channels = metrics_df_all_channels[column_order]
# display(metrics_df_all_channels)

# # Agrupar y promediar por relabeled por sujeto y tipo de movimiento
# summary_by_relabeled_100 = metrics_df_all_channels.drop(columns=["window_id", "file_name", "window_number"])
# summary_by_relabeled_100 = summary_by_relabeled_300.groupby(["subject", "relabeled", "stimulus"]).mean(numeric_only=True).reset_index()

# print("\nResumen de métricas por sujeto, relabeled y tipo de movimiento:")
# display(summary_by_relabeled_100)

# # Agrupar y promediar por relabeled globalmente (todos los sujetos y estímulos)
# global_summary_by_relabeled_100 = (
#     metrics_df_all_channels
#     .drop(columns=["subject", "stimulus", "window_id", "file_name", "window_number"])
#     .groupby("relabeled")
#     .mean(numeric_only=True)
#     .reset_index()
# )

# print("\nResumen global de métricas promediadas por 'relabeled' (todos los sujetos):")
# display(global_summary_by_relabeled_100)

# # Estadísticas generales
# print(f"\nTotal de ventanas procesadas: {len(metrics_df_all_channels)}")
# print(f"Distribución por sujeto:\n{metrics_df_all_channels['subject'].value_counts()}")
# print(f"Distribución por movimiento:\n{metrics_df_all_channels['relabeled'].value_counts()}")

## Boxplot analysis for windowed

In [None]:
# # Suponiendo que los DataFrames ya están cargados
# dataframes = {
#     '300': global_summary_by_relabeled_300,
#     '200': global_summary_by_relabeled_200,
#     '100': global_summary_by_relabeled_100
# }

# # Definir las métricas excluyendo 'window_number'
# metrics = [col for col in global_summary_by_relabeled_100.columns if col != 'window_number']

# # Normalizar los datos
# scaler = MinMaxScaler()
# normalized_dataframes = {}
# for label, df in dataframes.items():
#     df_normalized = df.copy()
#     df_normalized[metrics] = scaler.fit_transform(df[metrics])
#     normalized_dataframes[label] = df_normalized

# # Crear un solo DataFrame para facilitar el boxplot
# merged_data = []
# for label, df in normalized_dataframes.items():
#     df_melted = df[metrics].melt(var_name='Métrica', value_name='Valor')
#     df_melted['Fuente'] = label
#     merged_data.append(df_melted)

# df_final = pd.concat(merged_data, ignore_index=True)
# display(df_final)
# # Configurar el gráfico
# plt.figure(figsize=(15, 6))
# sns.boxplot(data=df_final, x='Métrica', y='Valor', hue='Fuente')
# plt.xticks(rotation=90)
# plt.title('Distribución de métricas normalizadas en los diferentes DataFrames')
# plt.xlabel('Métrica')
# plt.ylabel('Valor normalizado')
# plt.legend(title='Fuente')
# plt.show()

In [None]:
# # 1. Organizar los datos por relabeled y tamaño de ventana
# ventanas = {
#     '100': {},  # DataFrames para ventana de 100ms
#     '200': {},  # DataFrames para ventana de 200ms
#     '300': {}   # DataFrames para ventana de 300ms
# }

# # Buscar todos los DataFrames globales que siguen el patrón df_relabeled_*_100
# for var_name in globals():
#     # Revisamos para las ventanas de 100ms
#     if var_name.startswith('df_relabeled_') and var_name.endswith('_100'):
#         relabeled_value = var_name.replace('df_relabeled_', '').replace('_100', '')
#         ventanas['100'][relabeled_value] = globals()[var_name]
    
#     # Para ventanas de 200ms (_200) y 300ms (_300)
#     elif var_name.startswith('df_relabeled_') and var_name.endswith('_200'):
#         relabeled_value = var_name.replace('df_relabeled_', '').replace('_200', '')
#         ventanas['200'][relabeled_value] = globals()[var_name]
    
#     elif var_name.startswith('df_relabeled_') and var_name.endswith('_300'):
#         relabeled_value = var_name.replace('df_relabeled_', '').replace('_300', '')
#         ventanas['300'][relabeled_value] = globals()[var_name]

# # 2. Identificar todas las métricas (columnas comunes excluyendo metadatos)
# metadata_cols = ['subject', 'relabeled', 'stimulus', 'channel', 'window_id', 'file_name']
# first_df = next(iter(next(iter(ventanas.values())).values()), None)

# if first_df is not None:
#     metrics = [col for col in first_df.columns if col not in metadata_cols]
# else:
#     metrics = []  

# # 3. Crear un DataFrame combinado para cada tamaño de ventana
# combined_data = []

# for window_size, relabeled_dict in ventanas.items():
#     for relabeled_value, df in relabeled_dict.items():
#         # Normalizar los datos de métricas para este relabeled
#         df_norm = df.copy()
#         scaler = MinMaxScaler()
        
#         # Solo normalizar columnas numéricas de métricas si hay datos
#         if len(df) > 0 and not df[metrics].empty:
#             df_norm[metrics] = scaler.fit_transform(df[metrics])
        
#         # Convertir a formato largo para seaborn
#         df_melted = df_norm[metrics].melt(var_name='Métrica', value_name='Valor')
#         df_melted['Relabeled'] = relabeled_value
#         df_melted['Ventana'] = f'{window_size}'  # Mantener solo el número para la leyenda
        
#         combined_data.append(df_melted)

# # Combinar todos los DataFrames
# df_final = pd.concat(combined_data, ignore_index=True)

# # 4. Crear un gráfico separado para cada relabeled
# relabeled_values = df_final['Relabeled'].unique()

# for relabeled in relabeled_values:
#     # Filtrar datos para este relabeled
#     relabeled_data = df_final[df_final['Relabeled'] == relabeled]
    
#     # Crear figura
#     plt.figure(figsize=(20, 10))
    
#     # Crear boxplot con métricas en el eje x y comparando ventanas
#     ax = sns.boxplot(
#         data=relabeled_data, 
#         x='Métrica', 
#         y='Valor', 
#         hue='Ventana',
#         palette=['#3274A1', '#E1812C', '#3A923A']  # Colores similares al ejemplo (azul, naranja, verde)
#     )
    
#     # Configurar el gráfico
#     plt.title(f'Distribución de métricas normalizadas para {relabeled}', fontsize=16)
#     plt.xlabel('Métrica', fontsize=14)
#     plt.ylabel('Valor normalizado', fontsize=14)
#     plt.xticks(rotation=90)  # Rotar etiquetas para mejorar legibilidad
    
#     # Personalizar la leyenda para que coincida con el formato del ejemplo
#     plt.legend(title='Fuente')
    
#     # Ajustar límites del eje y para que sea de 0 a 1 como en el ejemplo
#     plt.ylim(0, 1.05)
    
#     # Añadir cuadrícula para mejor lectura
#     plt.grid(axis='y', linestyle='--', alpha=0.7)
    
#     plt.tight_layout()
    
#     # Guardar el gráfico (opcional)
#     # plt.savefig(f'metricas_relabeled_{relabeled}.png', dpi=300, bbox_inches='tight')
    
#     plt.show()

# # 5. Opcionalmente, gráfico combinado para todos los relabeled
# plt.figure(figsize=(20, 10))

# # Crear boxplot con todas las métricas y ventanas
# sns.boxplot(
#     data=df_final, 
#     x='Métrica', 
#     y='Valor', 
#     hue='Ventana',
#     palette=['#3274A1', '#E1812C', '#3A923A'] 
# )

# # Configurar el gráfico
# plt.title('Distribución de métricas normalizadas en los diferentes DataFrames', fontsize=16)
# plt.xlabel('Métrica', fontsize=14)
# plt.ylabel('Valor normalizado', fontsize=14)
# plt.xticks(rotation=90)  # Rotar etiquetas para mejorar legibilidad
# plt.legend(title='Fuente')
# plt.ylim(0, 1.05)  # Límites del eje y para que sea de 0 a 1 como en el ejemplo
# plt.grid(axis='y', linestyle='--', alpha=0.7)  # Añadir cuadrícula

# plt.tight_layout()
# plt.show()

## With outliers

In [None]:
# Combinar todos los dataframes en uno solo para facilitar la visualización
all_data = []
relabeled_dfs = {}

# Buscar todas las variables df_relabeled_X_200 en el espacio global
for var_name in list(globals().keys()):
    if var_name.startswith('df_relabeled_') and var_name.endswith('_200'):
        relabeled_value = var_name.split('_')[2]  # Extraer el valor de relabeled
        relabeled_dfs[relabeled_value] = globals()[var_name]
        
        # Añadir los datos al conjunto combinado
        df_copy = globals()[var_name].copy()
        all_data.append(df_copy)

# Combinar todos los dataframes
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    metric_columns = [col for col in combined_df.columns if col not in ['subject', 'relabeled', 'stimulus']]

# Aplicar MinMaxScaler a las columnas de métricas
    scaler = MinMaxScaler()
    combined_df[metric_columns] = scaler.fit_transform(combined_df[metric_columns])
    # Identificar columnas de métricas (excluyendo columnas de metadatos)
    metric_columns = [col for col in combined_df.columns 
                    if col not in ['subject', 'relabeled', 'stimulus']]
    
    # Crear una figura con subplots para cada métrica
    n_metrics = len(metric_columns)
    fig, axes = plt.subplots(nrows=(n_metrics+1)//2, ncols=2, figsize=(14, 3*((n_metrics+1)//2)), 
                            constrained_layout=True)
    axes = axes.flatten()
    
    # Crear boxplots para cada métrica
    for i, metric in enumerate(metric_columns):
        if i < len(axes):
            # Crear boxplot usando seaborn
            sns.boxplot(x='relabeled', y=metric, data=combined_df, ax=axes[i], palette='viridis')
            
            # Añadir títulos y etiquetas
            axes[i].set_title(f'Comparación de {metric} por grasp', fontsize=14)
            axes[i].set_xlabel('Categoría')
            axes[i].set_ylabel(metric)
            
            # Rotar etiquetas del eje x si hay muchas categorías
            if len(combined_df['relabeled'].unique()) > 5:
                axes[i].tick_params(axis='x', rotation=45)
    
    # Eliminar subplots vacíos
    for i in range(n_metrics, len(axes)):
        fig.delaxes(axes[i])
    
    # Añadir título general
    plt.suptitle('Comparación de métricas EMG entre diferentes categorías', fontsize=16, y=1.02)
    
    # Mostrar la figura
    plt.tight_layout()
    plt.show()
    
    # Análisis estadístico básico (opcional)
    print("Stadistic for grasp:")
    for metric in metric_columns:
        print(f"\nMétrica: {metric}")
        display(combined_df.groupby('relabeled')[metric].describe())
else:
    print("No se encontraron variables df_relabeled_X_200 en el espacio global.")

In [None]:
import plotly.express as px

# Verifica que combined_df esté definido correctamente
if not all_data:
    print("No se encontraron variables df_relabeled_X_200 en el espacio global.")
else:
    # Crear un boxplot interactivo para cada métrica
    for metric in metric_columns:
        fig = px.box(
            combined_df,
            x='relabeled',
            y=metric,
            color='relabeled',
            points='all',  # Muestra los puntos individuales
            hover_data=['subject'],  # Mostrar 'subject' al pasar el cursor
            title=f'Boxplot interactivo para {metric} por grasp'
        )
        fig.update_layout(
            xaxis_title='Grasp (relabeled)',
            yaxis_title=metric,
            boxmode='group',
            showlegend=False
        )
        fig.show()


- Fisher Score analysis 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway
import numpy as np

# Combinar todos los dataframes en uno solo para facilitar la visualización
all_data = []
relabeled_dfs = {}

# Buscar todas las variables df_relabeled_X_200 en el espacio global
for var_name in list(globals().keys()):
    if var_name.startswith('df_relabeled_') and var_name.endswith('_200'):
        relabeled_value = var_name.split('_')[2]  # Extraer el valor de relabeled
        relabeled_dfs[relabeled_value] = globals()[var_name]
        
        # Añadir los datos al conjunto combinado
        df_copy = globals()[var_name].copy()
        all_data.append(df_copy)

# Combinar todos los dataframes
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    metric_columns = [col for col in combined_df.columns if col not in ['subject', 'relabeled', 'stimulus']]

# Aplicar MinMaxScaler a las columnas de métricas
    scaler = MinMaxScaler()
    combined_df[metric_columns] = scaler.fit_transform(combined_df[metric_columns])
    # Identificar columnas de métricas (excluyendo columnas de metadatos)
    metric_columns = [col for col in combined_df.columns 
                    if col not in ['subject', 'relabeled', 'stimulus']]
    
    # Crear una figura con subplots para cada métrica
    n_metrics = len(metric_columns)
    fig, axes = plt.subplots(nrows=(n_metrics+1)//2, ncols=2, figsize=(14, 3*((n_metrics+1)//2)), 
                            constrained_layout=True)
    axes = axes.flatten()
    
    # Crear boxplots para cada métrica
    for i, metric in enumerate(metric_columns):
        if i < len(axes):
            # Crear boxplot usando seaborn
            sns.boxplot(x='relabeled', y=metric, data=combined_df, ax=axes[i], palette='viridis')
            
            # Añadir títulos y etiquetas
            axes[i].set_title(f'Comparación de {metric} por grasp', fontsize=14)
            axes[i].set_xlabel('Categoría')
            axes[i].set_ylabel(metric)
            
            # Rotar etiquetas del eje x si hay muchas categorías
            if len(combined_df['relabeled'].unique()) > 5:
                axes[i].tick_params(axis='x', rotation=45)
    
    # Eliminar subplots vacíos
    for i in range(n_metrics, len(axes)):
        fig.delaxes(axes[i])
    
    # Añadir título general
    plt.suptitle('Comparación de métricas EMG entre diferentes categorías', fontsize=16, y=1.02)
    
    # Mostrar la figura
    plt.tight_layout()
    plt.show()
    
    # 🧪 Calcular ANOVA y Fisher Score
    anova_results = {}
    fisher_scores = {}

    categories = combined_df['relabeled'].unique()

    for metric in metric_columns:
        # Agrupar por categoría para ANOVA
        groups = [combined_df[combined_df['relabeled'] == cat][metric].dropna().values for cat in categories]
        
        # ANOVA
        try:
            f_stat, p_val = f_oneway(*groups)
        except:
            f_stat, p_val = np.nan, np.nan
        
        anova_results[metric] = {'F-statistic': f_stat, 'p-value': p_val}
        
        # Fisher Score
        overall_mean = combined_df[metric].mean()
        num = 0
        den = 0
        
        for cat in categories:
            class_data = combined_df[combined_df['relabeled'] == cat][metric]
            ni = len(class_data)
            class_mean = class_data.mean()
            class_var = class_data.var()
            
            num += ni * (class_mean - overall_mean) ** 2
            den += ni * class_var
        
        fisher = num / den if den != 0 else 0
        fisher_scores[metric] = fisher

    # 📋 Mostrar resultados ordenados por Fisher Score
    results_df = pd.DataFrame.from_dict(anova_results, orient='index')
    results_df['Fisher Score'] = pd.Series(fisher_scores)
    results_df_sorted = results_df.sort_values(by='Fisher Score', ascending=False)

    print("\n📊 Resultados ANOVA y Fisher Score ordenados:\n")
    display(results_df_sorted)

else:
    print("No se encontraron variables df_relabeled_X_200 en el espacio global.")

In [None]:
df = results_df_sorted[results_df_sorted['F-statistic'] > 5]
df = df[df['p-value'] < 0.05]
df = df[df['Fisher Score'] > 0.5]
df

In [None]:
inter_subject_stability = {}

for metric in metric_columns:
    stability_per_class = {}
    for label in combined_df['relabeled'].unique():
        subset = combined_df[combined_df['relabeled'] == label]
        # Media por sujeto
        subject_means = subset.groupby('subject')[metric].mean()
        # STD entre sujetos
        std_across_subjects = subject_means.std()
        stability_per_class[label] = std_across_subjects
    inter_subject_stability[metric] = stability_per_class

# Convertir a DataFrame para visualizar
stability_df = pd.DataFrame(inter_subject_stability).T
display(stability_df)

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Excluir métricas específicas
excluded_features = ['ZC', 'ZC_STD', 'Kurt', 'Kurt_STD']
filtered_metric_columns = [col for col in metric_columns if col not in excluded_features]

# Filtrar columnas relevantes en el DataFrame (sin eliminar atípicos)
filtered_df = combined_df[['relabeled'] + filtered_metric_columns].copy()

# 1. Calcular la mediana por grupo (relabeled) para cada métrica
median_df = filtered_df.groupby('relabeled')[filtered_metric_columns].median()

# 2. Calcular la varianza entre las medianas para cada métrica
median_variance = median_df.var()

# 3. Convertir a DataFrame para visualización
median_variance_df = median_variance.reset_index()
median_variance_df.columns = ['métrica', 'varianza_entre_medianas']

# 4. Normalizar las varianzas (Min-Max)
scaler = MinMaxScaler()
median_variance_df['varianza_normalizada'] = scaler.fit_transform(
    median_variance_df[['varianza_entre_medianas']]
)

# 5. Mostrar la tabla ordenada (opcional)
print("Varianza entre medianas y su normalización:")
display(median_variance_df.sort_values(by='varianza_normalizada', ascending=False))

# 6. Visualización con barplot
plt.figure(figsize=(10, 6))
sns.barplot(
    x='varianza_normalizada',
    y='métrica',
    data=median_variance_df.sort_values(by='varianza_normalizada', ascending=True),
    palette='viridis'
)
plt.title('Varianza normalizada entre medianas por métrica (con atípicos)', fontsize=14)
plt.xlabel('Varianza normalizada')
plt.ylabel('Métrica')
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

In [None]:
corr_matrix = combined_df[metric_columns].corr()

# Crear el heatmap de correlación
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Mapa de Correlación de Métricas')
plt.show()

In [None]:
display(corr_matrix)

In [None]:
# Obtener la matriz en forma de pares con sus correlaciones
corr_pairs = corr_matrix.unstack()

# Eliminar duplicados y la diagonal (correlación de una variable consigo misma)
# corr_pairs = corr_pairs[corr_pairs.index.get_level_values(0) != corr_pairs.index.get_level_values(1)]
# corr_pairs = corr_pairs.drop_duplicates()

# Filtrar pares con alta correlación
high_corr = corr_pairs[abs(corr_pairs) > 0.89].sort_values(ascending=False)

# Crear el DataFrame de correlaciones altas
high_corr_df = high_corr.reset_index()
high_corr_df.columns = ['Métrica 1', 'Métrica 2', 'Correlación']

# Mostrar el DataFrame
display(high_corr_df)

In [None]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# ---------- 1. Eliminar métricas específicas ----------
excluded_metrics = ['ZC', 'ZC_STD', 'Kurt', 'Kurt_STD']
filtered_metrics = [col for col in metric_columns if col not in excluded_metrics]

# ---------- 2. Entrenamiento del modelo con todos los datos (sin eliminar atípicos) ----------
X = combined_df[filtered_metrics]
y = combined_df['relabeled']

# Crear y entrenar el modelo Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# Obtener importancias de características
importances = rf_model.feature_importances_

# ---------- 3. Visualización ----------
plt.figure(figsize=(11, 6))
sns.barplot(x=filtered_metrics, y=importances, palette='viridis')
plt.title('Importancia de Características para la Clasificación de Agarre (con atípicos y sin ZC/Kurtosis)')
plt.xlabel('Métricas')
plt.ylabel('Importancia')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


- Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# ---------- 1. Definir las métricas a usar ----------
# Asegúrate de que 'filtered_metrics' esté definido, por ejemplo:
# filtered_metrics = [col for col in combined_df.columns if col not in ['subject', 'relabeled', 'stimulus']]

# ---------- 2. Definir variables X e y ----------
X = combined_df[filtered_metrics]
y = combined_df['relabeled']

# ---------- 3. Entrenar modelo ----------
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# ---------- 4. Importancia de características ----------
importances = rf_model.feature_importances_
importance_df = pd.DataFrame({
    'Métrica': filtered_metrics,
    'Importancia': importances
}).sort_values(by='Importancia', ascending=False).reset_index(drop=True)

# ---------- 5. Mostrar resultados ----------
print("Importancia de las características sin eliminar atípicos:")
display(importance_df)

# ---------- 6. Visualización ----------
plt.figure(figsize=(11, 6))
sns.barplot(x='Métrica', y='Importancia', data=importance_df, palette='viridis')
plt.title('Importancia de Características para la Clasificación de Agarre (con atípicos)')
plt.xlabel('Métricas')
plt.ylabel('Importancia')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


- Forest

In [None]:
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# ---------- 1. Definir las métricas a usar ----------
# Asegúrate de tener esta lista definida:
# filtered_metrics = [col for col in combined_df.columns if col not in ['subject', 'relabeled', 'stimulus']]

# ---------- 2. Definir variables X e y ----------
X = combined_df[filtered_metrics]
y = combined_df['relabeled']

# ---------- 3. Entrenar modelo ----------
tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(X, y)

# ---------- 4. Importancia de características ----------
importances = tree_model.feature_importances_
importances_percentage = 100 * importances / importances.sum()

importances_df = pd.DataFrame({
    'Métrica': filtered_metrics,
    'Importancia (%)': importances_percentage
}).sort_values(by='Importancia (%)', ascending=False).reset_index(drop=True)

# Mostrar resultados
print("\n📊 Importancia de las características (%):")
display(importances_df)

# Verificación: la suma debería ser aproximadamente 100%
print(f"\n✅ Suma total de importancias: {importances_percentage.sum():.2f}%")

# ---------- 5. Visualización ----------
plt.figure(figsize=(11, 6))
sns.barplot(data=importances_df, x='Métrica', y='Importancia (%)', palette='viridis')
plt.title('Importancia de Características para la Clasificación de Agarre (Árbol de Decisión con atípicos)')
plt.xlabel('Métricas')
plt.ylabel('Importancia (%)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


- Covariance

In [None]:
import os
import re
import numpy as np
import pandas as pd

# ---------- Función para calcular CV ----------
def calculate_cv(metrics_df):
    cv_values = {}
    for metric in metrics_df.columns:
        mean_value = metrics_df[metric].mean()
        std_value = metrics_df[metric].std()
        if mean_value != 0:
            cv_values[metric] = (std_value / mean_value) * 100
        else:
            cv_values[metric] = np.nan
    return cv_values

# ---------- Parámetros ----------
fm = 2000
window_length = 400
overlap = 0
target_channel = "Channel 8"

all_metrics = []

# ---------- Iterar sobre los archivos ----------
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)

        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)

                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue

                test_df, grasps = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                test_df = test_df[test_df['relabeled'].isin(filtered_labels)]

                if target_channel not in test_df.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue

                emg_columns = [target_channel]
                envelope_df = src.get_envelope_lowpass(
                    test_df[emg_columns], fm=2000, cutoff_freq=0.6, envelope_type=1
                )

                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)

                window_count = 0

                for grasp in grasps:
                    try:
                        print(f"\nProcessing Grasp {grasp} in file {file_name}:")
                        grasp_df = result_df[result_df['stimulus'] == grasp]

                        if grasp_df.empty:
                            print(f"No hay datos para el grasp {grasp} en {file_name}.")
                            continue

                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)

                        for i, ventana in enumerate(ventanas):
                            if len(ventana) == window_length:
                                signal = ventana[target_channel].values
                                metrics = calculate_emg_metrics_means(signal)

                                metrics_with_meta = {
                                    "subject": folder,
                                    "relabeled": grasp_df['relabeled'].iloc[0],
                                    "stimulus": grasp,
                                    "channel": target_channel,
                                    "window_id": f"{file_name}_{grasp}_{i}",
                                    "file_name": file_name,
                                    "window_number": window_count,
                                    **metrics
                                }

                                all_metrics.append(metrics_with_meta)
                                window_count += 1
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue

                print(f"Procesadas {window_count} ventanas para el archivo {file_name}")

# ---------- Crear DataFrame general ----------
metrics_df_200 = pd.DataFrame(all_metrics)

# ---------- Reordenar columnas ----------
meta_cols = ["subject", "relabeled", "stimulus", "channel", "window_id", "file_name", "window_number"]
metric_cols = [col for col in metrics_df_200.columns if col not in meta_cols]
column_order = meta_cols + sorted(metric_cols)
metrics_df_200 = metrics_df_200[column_order]

# ---------- Filtrar métricas no deseadas ----------
excluded_metrics = ['ZC', 'ZC_STD', 'Kurt', 'Kurt_STD']
filtered_metrics = [col for col in metric_cols if col not in excluded_metrics]

# ---------- Calcular CV directamente ----------
cv_values = calculate_cv(metrics_df_200[filtered_metrics])
cv_df = pd.DataFrame.from_dict(cv_values, orient='index', columns=['Coeficiente de Variación'])
cv_df = cv_df.sort_values(by='Coeficiente de Variación', ascending=False)

# ---------- Mostrar resultados ----------
print("\n📊 Coeficiente de variación de las métricas (con atípicos, sin ZC/Kurtosis):")
display(cv_df)

print("\n📈 Resumen de métricas por tipo de movimiento (completo):")
grouped_df = metrics_df_200.drop(columns=['channel'], errors='ignore')
summary_by_subject_movement_200 = grouped_df.select_dtypes(include=['number']).groupby(['relabeled']).mean()
display(summary_by_subject_movement_200)

print(f"\n✅ Total de ventanas procesadas: {len(metrics_df_200)}")
print(f"📌 Distribución por sujeto:\n{metrics_df_200['subject'].value_counts()}")
print(f"📌 Distribución por movimiento:\n{metrics_df_200['relabeled'].value_counts()}")


In [None]:
# Normalización Min-Max del Coeficiente de Variación

cv_df['Coeficiente de Variación Normalizado'] = (cv_df['Coeficiente de Variación'] - cv_df['Coeficiente de Variación'].min()) / (cv_df['Coeficiente de Variación'].max() - cv_df['Coeficiente de Variación'].min())
# Mostrar los valores normalizados

display(cv_df)

In [None]:
summary_by_relabeled_200.to_csv("summary_by_relabeled_200.csv", index=False)