## Libraries

In [None]:
import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
from matplotlib.patches import Ellipse

import plotly.express as px
import plotly.graph_objects as go

from itertools import combinations
from scipy import stats
from scipy.io import loadmat, whosmat
from scipy.spatial.distance import pdist, squareform, cdist
from scipy.cluster.hierarchy import dendrogram, linkage
import scipy.cluster.hierarchy as sch
from scipy.linalg import inv

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans

from statsmodels.multivariate.manova import MANOVA

import pywt

import src
from src import config, loadmatNina
from src.preprocessing_utils import get_envelope

In [None]:
# Choose the database to analyze
database = 'DB4'

data_path = f'data/{database}'

# Find the folder named with the convention s + "number"
folder = None
for item in os.listdir(data_path):
    if re.match(r'[sS]\d+', item) or re.match(r'Subject\d+', item):
        folder = item
        break

if folder:
    folder_path = os.path.join(data_path, folder)
    results = []

    # Iterate over all .mat files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.mat'):
            file_path = os.path.join(folder_path, file_name)
            info = whosmat(file_path)
            results.append((file_name, info))

    # Create a DataFrame to store the results
    data = {}
    for file_name, info in results:
        for item in info:
            if item[0] not in data:
                data[item[0]] = {}
            data[item[0]][file_name] = item[1:]

    df = pd.DataFrame(data)
    df = df.transpose()
    df.columns.name = 'File Name'

    print(df)
else:
    print("No folder found with the convention s + 'number'")

## Functions

In [None]:
def calculate_emg_metrics(signal, fs=2000):
    """
    Calculates various metrics for an EMG signal.

    Parameters:
    - signal: NumPy array containing the EMG signal.
    - fs: Sampling frequency in Hz (default: 1000 Hz).

    Returns:
    - A dictionary with the computed metrics.
    """
    # Mean Absolute Value (MAV)
    mav = np.mean(np.abs(signal))
    
    # Integrated Absolute Value (IAV)
    iav = np.sum(np.abs(signal))
    
    # Root Mean Square (RMS)
    rms = np.sqrt(np.mean(signal**2))
    
    # Waveform Length (WL)
    wl = np.sum(np.abs(np.diff(signal)))
    
    # Zero Crossings (ZC)
    zc = np.sum(np.diff(np.sign(signal)) != 0)
    
    # Slope Sign Changes (SSC)
    diff_signal = np.diff(signal)
    ssc = np.sum((diff_signal[1:] * diff_signal[:-1]) < 0)
    
    # Variance (VAR)
    var = np.var(signal)
    
    # Coefficient of Variation (CoV)
    mean_signal = np.mean(signal)
    cov = (np.std(signal) / mean_signal) if mean_signal != 0 else 0
    
    # Mean Frequency (MNF)
    freqs = np.fft.rfftfreq(len(signal), d=1/fs)
    fft_magnitude = np.abs(np.fft.rfft(signal))
    mnf = np.sum(freqs * fft_magnitude) / np.sum(fft_magnitude)
    
    # Marginal Discrete Wavelet Transform (mDWT)
    coeffs = pywt.wavedec(signal, 'db4', level=4)
    mdwt = np.sum([np.sum(np.abs(c)) for c in coeffs])
    
    # Temporal Difference (TD)
    td = np.sum(np.abs(np.diff(signal)))
    
    # Mean Absolute Value Slope (MAVS)
    mavs = np.mean(np.abs(np.diff(signal)))
    
    # Return the metrics as a dictionary
    metrics = {
        "MAV": mav,
        "IAV": iav,
        "RMS": rms,
        "WL": wl,
        "ZC": zc,
        "SSC": ssc,
        "VAR": var,
        "CoV": cov,
        "MNF": mnf,
        "mDWT": mdwt,
        "TD": td,
        "MAVS": mavs
    }
    
    return metrics

In [None]:
def calculate_emg_metrics_std(signal, fs=2000):
    """
    Calculates various metrics for an EMG signal, including mean and standard deviation.

    Parameters:
    - signal: NumPy array containing the EMG signal.
    - fs: Sampling frequency in Hz (default: 1000 Hz).

    Returns:
    - A dictionary with the computed metrics.
    """
    if signal.ndim == 2:
        metrics_per_channel = [calculate_emg_metrics(signal[:, ch], fs) for ch in range(signal.shape[1])]
        averaged_metrics = {key: np.mean([m[key] for m in metrics_per_channel]) for key in metrics_per_channel[0]}
        return averaged_metrics
    
    # Mean Absolute Value (MAV)
    mav = np.mean(np.abs(signal))
    mav_std = np.std(np.abs(signal))
    
    # Integrated Absolute Value (IAV)
    iav = np.sum(np.abs(signal))
    iav_std = np.std(np.abs(signal))
    
    # Root Mean Square (RMS)
    rms = np.sqrt(np.mean(signal**2))
    rms_std = np.std(signal)
    
    # Waveform Length (WL)
    wl = np.sum(np.abs(np.diff(signal)))
    wl_std = np.std(np.abs(np.diff(signal)))
    
    # Zero Crossings (ZC)
    zc = np.sum(np.diff(np.sign(signal)) != 0)
    zc_std = np.std(np.diff(np.sign(signal)) != 0)
    
    # Slope Sign Changes (SSC)
    diff_signal = np.diff(signal)
    ssc = np.sum((diff_signal[1:] * diff_signal[:-1]) < 0)
    ssc_std = np.std((diff_signal[1:] * diff_signal[:-1]) < 0)
    
    # Variance (VAR)
    var = np.var(signal)
    var_std = np.std(signal)
    
    # Coefficient of Variation (CoV)
    mean_signal = np.mean(signal)
    cov = (np.std(signal) / mean_signal) if mean_signal != 0 else 0
    cov_std = np.std(cov)
    
    # Mean Frequency (MNF)
    freqs = np.fft.rfftfreq(len(signal), d=1/fs)
    fft_magnitude = np.abs(np.fft.rfft(signal))
    mnf = np.sum(freqs * fft_magnitude) / np.sum(fft_magnitude)
    mnf_std = np.std(freqs * fft_magnitude) / np.sum(fft_magnitude)
    
    # Marginal Discrete Wavelet Transform (mDWT)
    coeffs = pywt.wavedec(signal, 'db4', level=4)
    mdwt = np.sum([np.sum(np.abs(c)) for c in coeffs])
    mdwt_std = np.std([np.sum(np.abs(c)) for c in coeffs])
    
    # Temporal Difference (TD)
    td = np.sum(np.abs(np.diff(signal)))
    td_std = np.std(np.abs(np.diff(signal)))
    
    # Mean Absolute Value Slope (MAVS)
    mavs = np.mean(np.abs(np.diff(signal)))
    mavs_std = np.std(np.abs(np.diff(signal)))
    
    # Return the metrics as a dictionary
    metrics = {
        "MAV": mav, "MAV_STD": mav_std,
        "IAV": iav, "IAV_STD": iav_std,
        "RMS": rms, "RMS_STD": rms_std,
        "WL": wl, "WL_STD": wl_std,
        "ZC": zc, "ZC_STD": zc_std,
        "SSC": ssc, "SSC_STD": ssc_std,
        "VAR": var, "VAR_STD": var_std,
        "CoV": cov, "CoV_STD": cov_std,
        "MNF": mnf, "MNF_STD": mnf_std,
        "mDWT": mdwt, "mDWT_STD": mdwt_std,
        "TD": td, "TD_STD": td_std,
        "MAVS": mavs, "MAVS_STD": mavs_std
    }
    
    return metrics


In [None]:
def calculate_emg_metrics_means(signal, fs=2000):
    """
    Calculates various metrics for an EMG signal, including mean and standard deviation.

    Parameters:
    - signal: NumPy array containing the EMG signal.
    - fs: Sampling frequency in Hz (default: 1000 Hz).

    Returns:
    - A dictionary with the computed metrics.
    """
    try:
        if signal.ndim == 2:
            metrics_per_channel = [calculate_emg_metrics(signal[:, ch], fs) for ch in range(signal.shape[1])]
            averaged_metrics = {key: np.mean([m[key] for m in metrics_per_channel]) for key in metrics_per_channel[0]}
            return averaged_metrics
        
        abs_signal = np.abs(signal)
        diff_signal = np.diff(signal)
        diff_abs_signal = np.abs(diff_signal)
        
        # Compute Metrics
        metrics = {
            "MAV": np.mean(abs_signal), "MAV_STD": np.std(abs_signal),
            "IAV": np.sum(abs_signal), "IAV_STD": np.std(abs_signal),
            "RMS": np.sqrt(np.mean(signal**2)), "RMS_STD": np.std(signal),
            "WL": np.sum(diff_abs_signal), "WL_STD": np.std(diff_abs_signal),
            "ZC": np.sum(np.diff(np.sign(signal)) != 0), "ZC_STD": np.std(np.diff(np.sign(signal)) != 0),
            "SSC": np.sum((diff_signal[1:] * diff_signal[:-1]) < 0), "SSC_STD": np.std((diff_signal[1:] * diff_signal[:-1]) < 0),
            "VAR": np.var(signal), "VAR_STD": np.std(signal),
            "CoV": (np.std(signal) / np.mean(signal)) if np.mean(signal) != 0 else 0,
            "TD": np.sum(diff_abs_signal), "TD_STD": np.std(diff_abs_signal),
            "MAVS": np.mean(diff_abs_signal), "MAVS_STD": np.std(diff_abs_signal),
            "MNP": np.mean(signal**2), "MNP_STD": np.std(signal**2),
        }
        
        # Spectral Metrics
        freqs = np.fft.rfftfreq(len(signal), d=1/fs)
        fft_magnitude = np.abs(np.fft.rfft(signal))
        metrics["MNF"] = np.sum(freqs * fft_magnitude) / np.sum(fft_magnitude) if np.sum(fft_magnitude) != 0 else 0
        metrics["MNF_STD"] = np.std(freqs * fft_magnitude) / np.sum(fft_magnitude) if np.sum(fft_magnitude) != 0 else 0
        
        # Wavelet Transform
        coeffs = pywt.wavedec(signal, 'db4', level=4)
        mdwt_values = np.array([np.sum(np.abs(c)) for c in coeffs])
        metrics["mDWT"] = np.sum(mdwt_values)
        metrics["mDWT_STD"] = np.std(mdwt_values)
        
        # Kurtosis
        std_signal = np.std(signal)
        metrics["Kurt"] = np.mean((signal - np.mean(signal)) ** 4) / (std_signal ** 4) if std_signal != 0 else 0
        metrics["Kurt_STD"] = np.std(metrics["Kurt"])
        
        return metrics
    
    except Exception as e:
        print(f"Error in calculate_emg_metrics: {e}")
        return {}



## Combined dataframe for all database

In [None]:
# Database name
database = 'DB4'

# Full path to the database folder
data_path = os.path.abspath(os.path.join('data', database))

# List of subjects, generating names from 's1' to 's10'
subjects = [f's{i}' for i in range(1, 11)]

In [None]:
# List to store all generated DataFrames
all_dataframes = []

# Look for folders matching the pattern "s + number" or "Subject + number"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterate over all .mat files in the folder
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Attempt to load the .mat file
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Attempt to process the file with src.build_dataframe
                try:
                    test_df, grasps = src.build_dataframe(
                        mat_file=mat_data,
                        database=database,
                        filename=file_name,
                        rectify=False,
                        normalize=True
                    )
                    
                    # Add a column with the subject name (folder) to the DataFrame
                    test_df['subject'] = folder  
                    
                    # Append the processed DataFrame to the list
                    all_dataframes.append(test_df)

                except Exception as e:
                    print(f"Error processing {file_name}: {str(e)}")
                    continue

# Concatenate all DataFrames into a single one if data is available
if all_dataframes:  
    combined_df = pd.concat(all_dataframes, ignore_index=True)
    
    # Display the combined DataFrame
    print("\n Combined DataFrame:")
    display(combined_df)  

else:
    print("Warning: No DataFrames were generated. Check the input data.")

## Metrics with std for every channel, for every grasp

In [None]:
metrics_data = []

# Iterate over each subject and each identified movement (relabeled or stimulus)
for (subject, relabeled), group in combined_df.groupby(['subject', 'relabeled']):  # Change 'relabeled' to 'stimulus' if needed
    # Iterate over each EMG channel
    for channel in group.columns:  # Loop through all DataFrame columns
        if channel.startswith('Channel'):  # Filter only EMG signal columns
            # Get the signal values for the current channel
            channel_signal = group[channel].values
            
            # Compute EMG signal metrics for the current channel
            metrics = calculate_emg_metrics_means(channel_signal)
            
            # Append metadata and computed metrics to the list
            metrics_data.append({
                "subject": subject,  # Subject identification
                "relabeled": relabeled,  # Movement identification (relabeled or stimulus)
                "channel": channel,  # EMG channel
                **metrics  # Unpack all computed metrics
            })

# Create a DataFrame containing all the obtained metrics
metrics_df = pd.DataFrame(metrics_data)

# Reorder columns for better visualization (optional)
column_order = ["subject", "relabeled", "channel"] + list(metrics.keys())
metrics_df = metrics_df[column_order]

# Display the DataFrame with the computed metrics
print("\nMetrics DataFrame by Channel, Subject, and Relabeled:")
display(metrics_df)

In [None]:
# Remove the 'channel' column to group data by subject and movement type
grouped_df = metrics_df.drop(columns=['channel'])

# Compute the mean value of each metric grouped by subject and movement
df_mean = grouped_df.groupby(['subject', 'relabeled']).mean()

# Compute the standard deviation of each metric grouped by subject and movement
df_std = grouped_df.groupby(['subject', 'relabeled']).std()

# Rename columns to indicate they contain mean values
df_mean.columns = [f"{col} mean" for col in df_mean.columns]

# Rename columns to indicate they contain standard deviation values
df_std.columns = [f"{col} std" for col in df_std.columns]

# Merge the mean and standard deviation DataFrames into a single DataFrame
df_result = df_mean.merge(df_std, on=['subject', 'relabeled']).reset_index()

# Display the final DataFrame with aggregated metrics
display(df_result)

## Filtered labels from mahalanobis analysis 

In [None]:
# Define the specific 'relabeled' values we want to filter
filtered_labels = [55, 2, 14, 19, 32, 0]

# Filter the grouped DataFrame
dataframe_windowing = grouped_df.loc[filtered_labels]

dataframe_windowing

Filtered for channel 10

In [None]:
metrics_data = []

# Iterate over each subject and each identified movement (relabeled or stimulus)
for (subject, relabeled), group in combined_df.groupby(['subject', 'relabeled']):  # Change 'relabeled' to 'stimulus' if needed
    # Iterate over each EMG channel
    for channel in group.columns:  # Loop through all DataFrame columns
        if channel == 'Channel 10':  # Filter only EMG signal columns
            # Get the signal values for the current channel
            channel_signal = group[channel].values
            
            # Compute EMG signal metrics for the current channel
            metrics = calculate_emg_metrics_means(channel_signal)
            
            # Append metadata and computed metrics to the list
            metrics_data.append({
                "subject": subject,  # Subject identification
                "relabeled": relabeled,  # Movement identification (relabeled or stimulus)
                "channel": channel,  # EMG channel
                **metrics  # Unpack all computed metrics
            })

# Create a DataFrame containing all the obtained metrics
metrics_df = pd.DataFrame(metrics_data)


# Reorder columns for better visualization (optional)
column_order = ["subject", "relabeled", "channel"] + list(metrics.keys())
metrics_df = metrics_df[column_order]

# Display the DataFrame with the computed metrics
print("\nMetrics DataFrame by Channel, Subject, and Relabeled:")
display(metrics_df)


In [None]:
metrics_data = []

# Iterate over each subject and each identified movement (relabeled or stimulus)
for (subject, relabeled), group in combined_df.groupby(['subject', 'relabeled']):  # Change 'relabeled' to 'stimulus' if needed
    # Iterate over each EMG channel
    for channel in group.columns:  # Loop through all DataFrame columns
        if channel == 'Channel 10':  # Filter only EMG signal columns
            # Get the signal values for the current channel
            channel_signal = group[channel].values
            
            # Compute EMG signal metrics for the current channel
            metrics = calculate_emg_metrics_means(channel_signal)
            
            # Append metadata and computed metrics to the list
            metrics_data.append({
                "subject": subject,  # Subject identification
                "relabeled": relabeled,  # Movement identification (relabeled or stimulus)
                "channel": channel,  # EMG channel
                **metrics  # Unpack all computed metrics
            })

# Create a DataFrame containing all the obtained metrics
metrics_df = pd.DataFrame(metrics_data)
metrics_df = metrics_df[metrics_df['relabeled'].isin(filtered_labels)]

# Reorder columns for better visualization (optional)
column_order = ["subject", "relabeled", "channel"] + list(metrics.keys())
metrics_df = metrics_df[column_order]

# Display the DataFrame with the computed metrics
print("\nMetrics DataFrame by Channel, Subject, and Relabeled:")
display(metrics_df)


In [None]:
target_channel = "Channel_10"  # Canal objetivo a graficar
for subject in subjects:
    subject_dir = os.path.join(data_path, subject)
    
    # Iterate over exercise files E1, E2, and E3 for the current subject
    for exercise in ["E1", "E2", "E3"]:
        filename = f"{subject.upper()}_{exercise}_A1.mat"
        file_path = os.path.join(subject_dir, filename)
        
        # Check if the file exists
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            continue
        
        print(f"\nProcessing: {filename}")
        
        # Load data from the .mat file
        try:
            mat_data = src.loadmatNina(database, filename, subject=subject)
            
            # Verify the structure of the loaded dictionary
            print(f"Keys in mat_data: {mat_data.keys()}")
            
            # Retrieve re-labeled data and the list of labeled grasps
            df_norm, grasps_etiquetados = src.build_dataframe(
                mat_file=mat_data,
                database=database,
                filename=filename,
                rectify=False,
                normalize=True
            )
            df_norm = df_norm[df_norm['relabeled'].isin(filtered_labels)]
            # Configuración para la extracción de envoltorios
            fm = 2000
            normalize_envelope = False
            
            # Iterate over each labeled grasp
            for grasp in grasps_etiquetados:
                try:
                    #print(f"\nProcessing Grasp {grasp}:")
                    
                    # Filtrar el DataFrame para el agarre específico
                    grasp_df = df_norm[df_norm['stimulus'] == grasp].copy()
                    
                    if grasp_df.empty:
                    #     print(f"No data found for grasp {grasp}")
                        continue
                    
                    # Select EMG columns
                    emg_columns = [col for col in grasp_df.columns if "Channel" in col]
                    
                    if not emg_columns:
                        print(f"No EMG channels found. Available columns: {grasp_df.columns.tolist()}")
                        continue
                    
                    # Verificar si el canal objetivo existe
                    if target_channel not in emg_columns:
                        print(f"Target channel {target_channel} not found. Available channels: {emg_columns}")
                        # Si el canal 10 específico no existe, intentar encontrar un canal equivalente
                        if "Channel_10" not in emg_columns and any(col.endswith("10") for col in emg_columns):
                            # Buscar cualquier canal que termine en "10"
                            target_channel = next(col for col in emg_columns if col.endswith("10"))
                            print(f"Using alternative channel: {target_channel}")
                        else:
                            print("Cannot find any channel equivalent to Channel_10, skipping this grasp")
                            continue
                        
                    #print(f"Processing channel: {target_channel}")
                    
                    # Lista de tipos de envoltorios a iterar
                    envelope_types = [1]
                    
                    # Lista para almacenar DataFrames transformados
                    transformed_dfs = []
                    
                    for envelope_type in envelope_types:
                        print(f"  Applying envelope type {envelope_type}")
                        
                        # Extraer canales EMG y aplicar envoltorios
                        emg_channels_df = src.extract_emg_channels(grasp_df)
                        envelope_df = src.get_envelope_lowpass(
                            emg_channels_df, 
                            fm, 
                            envelope_type=envelope_type, 
                            cutoff_freq=0.6
                        )
                        
                        # Normalizar por el valor máximo absoluto en todos los canales
                        if normalize_envelope:
                            global_max = envelope_df.abs().values.max()
                            if global_max != 0:
                                envelope_df = envelope_df / global_max
                        
                        # Preservar columnas no EMG
                        meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                        available_meta_columns = [col for col in meta_columns if col in grasp_df.columns]
                        
                        result_df = pd.concat([envelope_df, grasp_df[available_meta_columns]], axis=1)
                        transformed_dfs.append(result_df)

                    try:
                        print(f"Creating combined plot for {target_channel}")

                        # Crear figura y ejes
                        plt.figure(figsize=(12, 6))
                        
                        # Graficar la señal original
                        plt.plot(grasp_df["Time (s)"], grasp_df[target_channel], 'b-', 
                                label='Original Signal', linewidth=1, color = 'c')
                        
                        # Graficar los dos envoltorios en el mismo gráfico
                        colors = ['r-', 'g-']
                        for i, (df, envelope_type) in enumerate(zip(transformed_dfs, envelope_types)):
                            plt.plot(df["Time (s)"], df[target_channel], colors[i], 
                                    label=f'Envelope Type {envelope_type}', linewidth=3, color = 'm')
                        # Configurar el gráfico
                        plt.title(f"{filename} - Grasp {grasp} - {target_channel}")
                        plt.xlabel("Time (s)")
                        plt.ylabel("Amplitude")
                        plt.legend()
                        plt.grid(True)
                        
                        # Ajustar la visualización
                        plt.tight_layout()
                        
                        # Guardar y mostrar
                        output_filename = f"{subject}_{exercise}_grasp{grasp}_{target_channel}_combined.png"
                        #plt.savefig(output_filename)
                        plt.show()
                        print(f"Plot saved as {output_filename}")
                        
                    except Exception as plot_error:
                        print(f"Error creating combined plot: {str(plot_error)}")
                        import traceback
                        traceback.print_exc()
                        
                except KeyError as e:
                    print(f"    Error: {str(e)}")
                except Exception as e:
                    print(f"    Error processing grasp {grasp}: {str(e)}")
                    import traceback
                    traceback.print_exc()
                    continue
                    
        except Exception as e:
            print(f"Error processing file {filename}: {str(e)}")
            import traceback
            traceback.print_exc()
            continue
                


## Plots

- 100 ms


In [None]:
# Parámetros para la ventaneo
fm = 2000  # Frecuencia de muestreo en Hz
window_length = 200  # 100 ms en muestras
overlap = 0  # Sin superposición
target_channel = "Channel 10"

# Lista para almacenar todos los DataFrames generados
all_dataframes = []

# Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterar sobre todos los archivos .mat en la carpeta
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Intentar cargar el archivo .mat
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Procesar el archivo con src.build_dataframe
                df_norm, grasps_etiquetados = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                df_norm = df_norm[df_norm['relabeled'].isin(filtered_labels)]
                print(f"Columnas disponibles en {file_name}: {df_norm.columns.tolist()}")

                # Verificar si el canal objetivo está presente en el DataFrame
                if target_channel not in df_norm.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue
                
                # Aplicar extracción del envelope solo a Channel_10
                envelope_df = src.get_envelope_lowpass(df_norm[[target_channel]], fm = 2000,cutoff_freq = 0.6 ,envelope_type=1)  
                
                # Conservar columnas meta
                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, df_norm[meta_columns]], axis=1)
                
                # Procesar cada grasp
                for grasp in grasps_etiquetados:
                    try:
                        print(f"\nProcessing Grasp {grasp}:")
                        grasp_df = result_df[result_df['stimulus'] == grasp]
                        
                        if grasp_df.empty:
                            print(f"No hay datos para el grasp {grasp} en {file_name}.")
                            continue
                        
                        # Crear ventanas con overlap a partir del DataFrame filtrado
                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                        
                        # Guardar cada ventana como un DataFrame individual, solo si tiene el tamaño completo
                        ventanas_df = [pd.DataFrame(ventana, columns=[target_channel]) for ventana in ventanas if len(ventana) == window_length]
                        
                        if not ventanas_df:
                            print(f"No hay ventanas válidas para el grasp {grasp} en {file_name}.")
                            continue
                        
                        # Agregar a la lista general de DataFrames
                        all_dataframes.extend(ventanas_df)
                        
                        # Graficar la primera ventana de este grasp
                        plt.figure(figsize=(10, 4))
                        plt.plot(np.linspace(0, window_length / fm, window_length), ventanas_df[0][target_channel], label=target_channel)
                        plt.xlabel("Tiempo (s)")
                        plt.ylabel("Señal EMG")
                        plt.title(f"{file_name} - Grasp {grasp} - {target_channel}")
                        plt.legend()
                        plt.grid()
                        plt.show()
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue



In [None]:
# Parámetros para la ventaneo
fm = 2000  # Frecuencia de muestreo en Hz
window_length = 200  # 100 ms en muestras
overlap = 0  # Sin superposición
target_channel = "Channel 10"

# Lista para almacenar todos los DataFrames generados
all_dataframes = []
# Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterar sobre todos los archivos .mat en la carpeta
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Intentar cargar el archivo .mat
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Procesar el archivo con src.build_dataframe
                df_norm, grasps_etiquetados = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                df_norm = df_norm[df_norm['relabeled'].isin(filtered_labels)]
                print(f"Columnas disponibles en {file_name}: {df_norm.columns.tolist()}")

                # Verificar si el canal objetivo está presente en el DataFrame
                if target_channel not in df_norm.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue
                
                # Guardar la señal cruda antes de aplicar el envelope
                raw_signal = df_norm[[target_channel]].copy()
                
                # Aplicar extracción del envelope solo a Channel_10
                envelope_df = src.get_envelope_lowpass(df_norm[[target_channel]], fm=2000, cutoff_freq=0.6, envelope_type=1)  
                
                # Conservar columnas meta
                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                # Combinar señal envolvente con metadatos
                result_df = pd.concat([envelope_df, df_norm[meta_columns]], axis=1)
                
                # Procesar cada grasp
                for grasp in grasps_etiquetados:
                    try:
                        print(f"\nProcessing Grasp {grasp}:")
                        grasp_df = result_df[result_df['stimulus'] == grasp]
                        raw_grasp_df = raw_signal[df_norm['stimulus'] == grasp]  # Datos crudos correspondientes
                        
                        if grasp_df.empty:
                            print(f"No hay datos para el grasp {grasp} en {file_name}.")
                            continue
                        
                        # Crear ventanas con overlap a partir del DataFrame filtrado
                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                        ventanas_raw = src.create_windows_with_overlap(raw_grasp_df, window_length, overlap)
                        
                        # Guardar cada ventana como un DataFrame individual, solo si tiene el tamaño completo
                        ventanas_df = [pd.DataFrame(ventana, columns=[target_channel]) for ventana in ventanas if len(ventana) == window_length]
                        ventanas_raw_df = [pd.DataFrame(ventana, columns=[target_channel]) for ventana in ventanas_raw if len(ventana) == window_length]
                        
                        if not ventanas_df or not ventanas_raw_df:
                            print(f"No hay ventanas válidas para el grasp {grasp} en {file_name}.")
                            continue
                        
                        # Agregar a la lista general de DataFrames
                        all_dataframes.extend(ventanas_df)
                        
                        # Graficar la primera ventana de este grasp con ambas señales
                        plt.figure(figsize=(12, 5))
                        tiempo = np.linspace(0, window_length / fm, window_length)
                        
                        # Graficar señal cruda
                        plt.plot(tiempo, ventanas_raw_df[0][target_channel], color='c', alpha=0.7, 
                                label=f"{target_channel} - Señal Cruda")
                        
                        # Graficar envolvente
                        plt.plot(tiempo, ventanas_df[0][target_channel], color='m', linewidth=2, 
                                label=f"{target_channel} - Envolvente")
                        
                        plt.xlabel("Tiempo (s)")
                        plt.ylabel("Amplitud")
                        plt.title(f"{file_name} - Grasp {grasp} - {target_channel}")
                        plt.legend()
                        plt.grid(True)
                        plt.tight_layout()
                        plt.show()
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue

- 200 ms

In [None]:
# Parámetros para la ventaneo
fm = 2000  # Frecuencia de muestreo en Hz
window_length = 400  # 100 ms en muestras
overlap = 0  # Sin superposición
target_channel = "Channel 10"

# Lista para almacenar todos los DataFrames generados
all_dataframes = []
# Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterar sobre todos los archivos .mat en la carpeta
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Intentar cargar el archivo .mat
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Procesar el archivo con src.build_dataframe
                df_norm, grasps_etiquetados = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                df_norm = df_norm[df_norm['relabeled'].isin(filtered_labels)]
                print(f"Columnas disponibles en {file_name}: {df_norm.columns.tolist()}")

                # Verificar si el canal objetivo está presente en el DataFrame
                if target_channel not in df_norm.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue
                
                # Guardar la señal cruda antes de aplicar el envelope
                raw_signal = df_norm[[target_channel]].copy()
                
                # Aplicar extracción del envelope solo a Channel_10
                envelope_df = src.get_envelope_lowpass(df_norm[[target_channel]], fm=2000, cutoff_freq=0.6, envelope_type=1)  
                
                # Conservar columnas meta
                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                # Combinar señal envolvente con metadatos
                result_df = pd.concat([envelope_df, df_norm[meta_columns]], axis=1)
                
                # Procesar cada grasp
                for grasp in grasps_etiquetados:
                    try:
                        print(f"\nProcessing Grasp {grasp}:")
                        grasp_df = result_df[result_df['stimulus'] == grasp]
                        raw_grasp_df = raw_signal[df_norm['stimulus'] == grasp]  # Datos crudos correspondientes
                        
                        if grasp_df.empty:
                            #print(f"No hay datos para el grasp {grasp} en {file_name}.")
                            continue
                        
                        # Crear ventanas con overlap a partir del DataFrame filtrado
                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                        ventanas_raw = src.create_windows_with_overlap(raw_grasp_df, window_length, overlap)
                        
                        # Guardar cada ventana como un DataFrame individual, solo si tiene el tamaño completo
                        ventanas_df = [pd.DataFrame(ventana, columns=[target_channel]) for ventana in ventanas if len(ventana) == window_length]
                        ventanas_raw_df = [pd.DataFrame(ventana, columns=[target_channel]) for ventana in ventanas_raw if len(ventana) == window_length]
                        
                        if not ventanas_df or not ventanas_raw_df:
                            print(f"No hay ventanas válidas para el grasp {grasp} en {file_name}.")
                            continue
                        
                        # Agregar a la lista general de DataFrames
                        all_dataframes.extend(ventanas_df)
                        
                        # Graficar la primera ventana de este grasp con ambas señales
                        plt.figure(figsize=(12, 5))
                        tiempo = np.linspace(0, window_length / fm, window_length)
                        
                        # Graficar señal cruda
                        plt.plot(tiempo, ventanas_raw_df[0][target_channel], color='c', alpha=0.7, 
                                label=f"{target_channel} - Señal Cruda")
                        
                        # Graficar envolvente
                        plt.plot(tiempo, ventanas_df[0][target_channel], color='m', linewidth=2, 
                                label=f"{target_channel} - Envolvente")
                        
                        plt.xlabel("Tiempo (s)")
                        plt.ylabel("Amplitud")
                        plt.title(f"{file_name} - Grasp {grasp} - {target_channel}")
                        plt.legend()
                        plt.grid(True)
                        plt.tight_layout()
                        plt.show()
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue

- 300 ms

In [None]:
# Parámetros para la ventaneo
fm = 2000  # Frecuencia de muestreo en Hz
window_length = 600  # 100 ms en muestras
overlap = 0  # Sin superposición
target_channel = "Channel 10"

# Lista para almacenar todos los DataFrames generados
all_dataframes = []
# Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterar sobre todos los archivos .mat en la carpeta
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Intentar cargar el archivo .mat
                
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Procesar el archivo con src.build_dataframe
                df_norm, grasps_etiquetados = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                df_norm = df_norm[df_norm['relabeled'].isin(filtered_labels)]
                print(f"Columnas disponibles en {file_name}: {df_norm.columns.tolist()}")

                # Verificar si el canal objetivo está presente en el DataFrame
                if target_channel not in df_norm.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue
                
                # Guardar la señal cruda antes de aplicar el envelope
                raw_signal = df_norm[[target_channel]].copy()
                
                # Aplicar extracción del envelope solo a Channel_10
                envelope_df = src.get_envelope_lowpass(df_norm[[target_channel]], fm=2000, cutoff_freq=0.6, envelope_type=1)  
                
                # Conservar columnas meta
                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                # Combinar señal envolvente con metadatos
                result_df = pd.concat([envelope_df, df_norm[meta_columns]], axis=1)
                
                # Procesar cada grasp
                for grasp in grasps_etiquetados:
                    try:
                        print(f"\nProcessing Grasp {grasp}:")
                        grasp_df = result_df[result_df['stimulus'] == grasp]
                        raw_grasp_df = raw_signal[df_norm['stimulus'] == grasp]  # Datos crudos correspondientes
                        
                        if grasp_df.empty:
                            print(f"No hay datos para el grasp {grasp} en {file_name}.")
                            continue
                        
                        # Crear ventanas con overlap a partir del DataFrame filtrado
                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                        ventanas_raw = src.create_windows_with_overlap(raw_grasp_df, window_length, overlap)
                        
                        # Guardar cada ventana como un DataFrame individual, solo si tiene el tamaño completo
                        ventanas_df = [pd.DataFrame(ventana, columns=[target_channel]) for ventana in ventanas if len(ventana) == window_length]
                        ventanas_raw_df = [pd.DataFrame(ventana, columns=[target_channel]) for ventana in ventanas_raw if len(ventana) == window_length]
                        
                        if not ventanas_df or not ventanas_raw_df:
                            print(f"No hay ventanas válidas para el grasp {grasp} en {file_name}.")
                            continue
                        
                        # Agregar a la lista general de DataFrames
                        all_dataframes.extend(ventanas_df)
                        
                        # Graficar la primera ventana de este grasp con ambas señales
                        plt.figure(figsize=(12, 5))
                        tiempo = np.linspace(0, window_length / fm, window_length)
                        
                        # Graficar señal cruda
                        plt.plot(tiempo, ventanas_raw_df[0][target_channel], color='c', alpha=0.7, 
                                label=f"{target_channel} - Señal Cruda")
                        
                        # Graficar envolvente
                        plt.plot(tiempo, ventanas_df[0][target_channel], color='m', linewidth=2, 
                                label=f"{target_channel} - Envolvente")
                        
                        plt.xlabel("Tiempo (s)")
                        plt.ylabel("Amplitud")
                        plt.title(f"{file_name} - Grasp {grasp} - {target_channel}")
                        plt.legend()
                        plt.grid(True)
                        plt.tight_layout()
                        plt.show()
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue

## Dataframes

- 100 ms

In [None]:
# Parámetros para el ventaneado
fm = 2000  # Frecuencia de muestreo en Hz
window_length = 200  # Ventana de 300 ms en muestras
overlap = 0  # Sin superposición
target_channel = "Channel 10"  # Canal específico para extraer métricas

# Lista para almacenar las métricas de todas las ventanas
all_metrics = []

# Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterar sobre todos los archivos .mat en la carpeta
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Intentar cargar el archivo .mat
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Procesar el archivo con src.build_dataframe
                test_df, grasps = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                test_df = test_df[test_df['relabeled'].isin(filtered_labels)]
                
                # Verificar si el canal objetivo está presente
                if target_channel not in test_df.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue
                
                # Aplicar extracción del envelope a todos los canales EMG o solo al canal objetivo
                emg_columns = [target_channel]  # Solo procesamos el canal objetivo
                envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=2000, cutoff_freq=0.6, envelope_type=1)
                
                # Conservar columnas meta
                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)
                
                window_count = 0  # Contador de ventanas para este archivo
                
                # Procesar cada grasp
                for grasp in grasps:
                    try:
                        print(f"\nProcessing Grasp {grasp} in file {file_name}:")
                        grasp_df = result_df[result_df['stimulus'] == grasp]
                        
                        if grasp_df.empty:
                            print(f"No hay datos para el grasp {grasp} en {file_name}.")
                            continue
                        
                        # Crear ventanas con overlap a partir del DataFrame filtrado
                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                        
                        # Procesar cada ventana
                        for i, ventana in enumerate(ventanas):
                            if len(ventana) == window_length:  # Solo procesar ventanas completas
                                # Extraer señal del canal objetivo
                                signal = ventana[target_channel].values
                                
                                # Calcular métricas para esta ventana
                                metrics = calculate_emg_metrics_means(signal)
                                
                                # Agregar metadata
                                metrics_with_meta = {
                                    "subject": folder,"relabeled": grasp_df['relabeled'].iloc[0],  # Tomamos el primer valor
                                    "stimulus": grasp,"channel": target_channel,"window_id": f"{file_name}_{grasp}_{i}","file_name": file_name,"window_number": window_count,**metrics  # Desempaquetar todas las métricas calculadas
                                }
                                
                                # Añadir a la lista general de métricas
                                all_metrics.append(metrics_with_meta)
                                window_count += 1
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue
                
                print(f"Procesadas {window_count} ventanas para el archivo {file_name}")

# Crear DataFrame con todas las métricas calculadas
metrics_df = pd.DataFrame(all_metrics)

# Reordenar columnas para mejor visualización
meta_cols = ["subject", "relabeled", "stimulus", "channel", "window_id", "file_name", "window_number"]
metric_cols = [col for col in metrics_df.columns if col not in meta_cols]
column_order = meta_cols + sorted(metric_cols)
metrics_df = metrics_df[column_order]
display(metrics_df)

# Mostrar estadísticas resumidas
print("\nResumen de métricas por sujeto y tipo de movimiento:")
grouped_df = metrics_df.drop(columns=['channel'])
#summary_by_subject_movement = grouped_df.select_dtypes(include=['number']).groupby(['relabeled']).mean()
summary_by_subject_movement = grouped_df.select_dtypes(include=['number']).groupby(['relabeled']).mean()
summary_by_subject_movement.drop('channel', axis=1, inplace=True, errors='ignore')
#summary_by_subject_movement = grouped_df.groupby
display(summary_by_subject_movement)

# # Guardar los resultados en un archivo CSV
# output_file = "emg_metrics_windowed.csv"
# #metrics_df.to_csv(output_file, index=False)
# print(f"\nMétricas guardadas en {output_file}")

# Mostrar información sobre los datos recolectados
print(f"\nTotal de ventanas procesadas: {len(metrics_df)}")
print(f"Distribución por sujeto:\n{metrics_df['subject'].value_counts()}")
print(f"Distribución por movimiento:\n{metrics_df['relabeled'].value_counts()}")


- 200 ms

In [None]:
# Parámetros para el ventaneado
fm = 2000  # Frecuencia de muestreo en Hz
window_length = 400  # Ventana de 300 ms en muestras
overlap = 0  # Sin superposición
target_channel = "Channel 10"  # Canal específico para extraer métricas

# Lista para almacenar las métricas de todas las ventanas
all_metrics = []

# Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterar sobre todos los archivos .mat en la carpeta
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Intentar cargar el archivo .mat
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Procesar el archivo con src.build_dataframe
                test_df, grasps = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                test_df = test_df[test_df['relabeled'].isin(filtered_labels)]
                
                # Verificar si el canal objetivo está presente
                if target_channel not in test_df.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue
                
                # Aplicar extracción del envelope a todos los canales EMG o solo al canal objetivo
                emg_columns = [target_channel]  # Solo procesamos el canal objetivo
                envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=2000, cutoff_freq=0.6, envelope_type=1)
                
                # Conservar columnas meta
                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)
                
                window_count = 0  # Contador de ventanas para este archivo
                
                # Procesar cada grasp
                for grasp in grasps:
                    try:
                        print(f"\nProcessing Grasp {grasp} in file {file_name}:")
                        grasp_df = result_df[result_df['stimulus'] == grasp]
                        
                        if grasp_df.empty:
                            print(f"No hay datos para el grasp {grasp} en {file_name}.")
                            continue
                        
                        # Crear ventanas con overlap a partir del DataFrame filtrado
                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                        
                        # Procesar cada ventana
                        for i, ventana in enumerate(ventanas):
                            if len(ventana) == window_length:  # Solo procesar ventanas completas
                                # Extraer señal del canal objetivo
                                signal = ventana[target_channel].values
                                
                                # Calcular métricas para esta ventana
                                metrics = calculate_emg_metrics_means(signal)
                                
                                # Agregar metadata
                                metrics_with_meta = {
                                    "subject": folder,"relabeled": grasp_df['relabeled'].iloc[0],  # Tomamos el primer valor
                                    "stimulus": grasp,"channel": target_channel,"window_id": f"{file_name}_{grasp}_{i}","file_name": file_name,"window_number": window_count,**metrics  # Desempaquetar todas las métricas calculadas
                                }
                                
                                # Añadir a la lista general de métricas
                                all_metrics.append(metrics_with_meta)
                                window_count += 1
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue
                
                print(f"Procesadas {window_count} ventanas para el archivo {file_name}")

# Crear DataFrame con todas las métricas calculadas
metrics_df_200 = pd.DataFrame(all_metrics)

# Reordenar columnas para mejor visualización
meta_cols = ["subject", "relabeled", "stimulus", "channel", "window_id", "file_name", "window_number"]
metric_cols = [col for col in metrics_df_200.columns if col not in meta_cols]
column_order = meta_cols + sorted(metric_cols)
metrics_df_200 = metrics_df_200[column_order]
display(metrics_df_200)

# Mostrar estadísticas resumidas
print("\nResumen de métricas por sujeto y tipo de movimiento:")
grouped_df = metrics_df_200.drop(columns=['channel'])
summary_by_subject_movement_200 = grouped_df.select_dtypes(include=['number']).groupby(['relabeled']).mean()

summary_by_subject_movement_200.drop('channel', axis=1, inplace=True, errors='ignore')

display(summary_by_subject_movement_200)

# # Guardar los resultados en un archivo CSV
# output_file = "emg_metrics_windowed.csv"
# #metrics_df.to_csv(output_file, index=False)
# print(f"\nMétricas guardadas en {output_file}")

# Mostrar información sobre los datos recolectados
print(f"\nTotal de ventanas procesadas: {len(metrics_df)}")
print(f"Distribución por sujeto:\n{metrics_df['subject'].value_counts()}")
print(f"Distribución por movimiento:\n{metrics_df['relabeled'].value_counts()}")


- 300 ms

In [None]:
# Parámetros para el ventaneado
fm = 2000  # Frecuencia de muestreo en Hz
window_length = 600  # Ventana de 300 ms en muestras
overlap = 0  # Sin superposición
target_channel = "Channel 10"  # Canal específico para extraer métricas

# Lista para almacenar las métricas de todas las ventanas
all_metrics = []

# Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterar sobre todos los archivos .mat en la carpeta
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Intentar cargar el archivo .mat
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Procesar el archivo con src.build_dataframe
                test_df, grasps = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                test_df = test_df[test_df['relabeled'].isin(filtered_labels)]
                
                # Verificar si el canal objetivo está presente
                if target_channel not in test_df.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue
                
                # Aplicar extracción del envelope a todos los canales EMG o solo al canal objetivo
                emg_columns = [target_channel]  # Solo procesamos el canal objetivo
                envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=2000, cutoff_freq=0.6, envelope_type=1)
                
                # Conservar columnas meta
                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)
                
                window_count = 0  # Contador de ventanas para este archivo
                
                # Procesar cada grasp
                for grasp in grasps:
                    try:
                        print(f"\nProcessing Grasp {grasp} in file {file_name}:")
                        grasp_df = result_df[result_df['stimulus'] == grasp]
                        
                        if grasp_df.empty:
                            print(f"No hay datos para el grasp {grasp} en {file_name}.")
                            continue
                        
                        # Crear ventanas con overlap a partir del DataFrame filtrado
                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                        
                        # Procesar cada ventana
                        for i, ventana in enumerate(ventanas):
                            if len(ventana) == window_length:  # Solo procesar ventanas completas
                                # Extraer señal del canal objetivo
                                signal = ventana[target_channel].values
                                
                                # Calcular métricas para esta ventana
                                metrics = calculate_emg_metrics_means(signal)
                                
                                # Agregar metadata
                                metrics_with_meta = {
                                    "subject": folder,"relabeled": grasp_df['relabeled'].iloc[0],  # Tomamos el primer valor
                                    "stimulus": grasp,"channel": target_channel,"window_id": f"{file_name}_{grasp}_{i}","file_name": file_name,"window_number": window_count,**metrics  # Desempaquetar todas las métricas calculadas
                                }
                                
                                # Añadir a la lista general de métricas
                                all_metrics.append(metrics_with_meta)
                                window_count += 1
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue
                
                print(f"Procesadas {window_count} ventanas para el archivo {file_name}")

# Crear DataFrame con todas las métricas calculadas
metrics_df_300 = pd.DataFrame(all_metrics)

# Reordenar columnas para mejor visualización
meta_cols = ["subject", "relabeled", "stimulus", "channel", "window_id", "file_name", "window_number"]
metric_cols = [col for col in metrics_df_300.columns if col not in meta_cols]
column_order = meta_cols + sorted(metric_cols)
metrics_df_200 = metrics_df_300[column_order]
display(metrics_df_300)

# Mostrar estadísticas resumidas
print("\nResumen de métricas por sujeto y tipo de movimiento:")
grouped_df = metrics_df_300.drop(columns=['channel'])
summary_by_subject_movement_300 = grouped_df.select_dtypes(include=['number']).groupby(['relabeled']).mean()

summary_by_subject_movement_300.drop('channel', axis=1, inplace=True, errors='ignore')

display(summary_by_subject_movement_300)

# # Guardar los resultados en un archivo CSV
# output_file = "emg_metrics_windowed.csv"
# #metrics_df.to_csv(output_file, index=False)
# print(f"\nMétricas guardadas en {output_file}")

# Mostrar información sobre los datos recolectados
print(f"\nTotal de ventanas procesadas: {len(metrics_df)}")
print(f"Distribución por sujeto:\n{metrics_df['subject'].value_counts()}")
print(f"Distribución por movimiento:\n{metrics_df['relabeled'].value_counts()}")


## Dataframes por grasp with enveloped

- 300 ms

In [None]:
# Parámetros para el ventaneado
fm = 2000  # Frecuencia de muestreo en Hz
window_length = 600  # Ventana de 300 ms en muestras
overlap = 0  # Sin superposición
target_channel = "Channel 10"  # Canal específico para extraer métricas

# Diccionario para almacenar DataFrames por cada valor único de 'relabeled'
metrics_dfs_by_relabeled = {}

# Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterar sobre todos los archivos .mat en la carpeta
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Intentar cargar el archivo .mat
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Procesar el archivo con src.build_dataframe
                test_df, grasps = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                test_df = test_df[test_df['relabeled'].isin(filtered_labels)]
                
                # Verificar si el canal objetivo está presente
                if target_channel not in test_df.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue
                
                # Aplicar extracción del envelope
                emg_columns = [target_channel]  # Solo procesamos el canal objetivo
                envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=2000, cutoff_freq=0.6, envelope_type=1)
                
                # Conservar columnas meta
                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)
                
                # Procesar cada grasp
                for grasp in grasps:
                    try:
                        grasp_df = result_df[result_df['stimulus'] == grasp]
                        
                        if grasp_df.empty:
                            continue
                        
                        # Crear ventanas con overlap a partir del DataFrame filtrado
                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                        
                        # Procesar cada ventana
                        for i, ventana in enumerate(ventanas):
                            if len(ventana) == window_length:  # Solo procesar ventanas completas
                                # Extraer señal del canal objetivo
                                signal = ventana[target_channel].values
                                
                                # Calcular métricas para esta ventana
                                metrics = calculate_emg_metrics_means(signal)
                                
                                # Agregar metadata
                                relabeled_value = grasp_df['relabeled'].iloc[0]
                                metrics_with_meta = {
                                    "subject": folder,
                                    "relabeled": relabeled_value,
                                    "stimulus": grasp,
                                    **metrics  # Desempaquetar todas las métricas calculadas
                                }
                                
                                # Añadir al DataFrame correspondiente
                                if relabeled_value not in metrics_dfs_by_relabeled:
                                    metrics_dfs_by_relabeled[relabeled_value] = []
                                metrics_dfs_by_relabeled[relabeled_value].append(metrics_with_meta)
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue

# Convertir listas en DataFrames, promediar por sujeto y asignar variables
for relabeled_value, data in metrics_dfs_by_relabeled.items():
    df = pd.DataFrame(data)
    df_mean = df.groupby(["subject", "relabeled", "stimulus"]).mean().reset_index()
    var_name = f"df_relabeled_{relabeled_value}_300"
    globals()[var_name] = df_mean
    display(globals()[var_name])



- 200 ms

In [None]:
# Parámetros para el ventaneado
fm = 2000  # Frecuencia de muestreo en Hz
window_length = 400  # Ventana de 300 ms en muestras
overlap = 0  # Sin superposición
target_channel = "Channel 10"  # Canal específico para extraer métricas

# Diccionario para almacenar DataFrames por cada valor único de 'relabeled'
metrics_dfs_by_relabeled = {}

# Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterar sobre todos los archivos .mat en la carpeta
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Intentar cargar el archivo .mat
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Procesar el archivo con src.build_dataframe
                test_df, grasps = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                test_df = test_df[test_df['relabeled'].isin(filtered_labels)]
                
                # Verificar si el canal objetivo está presente
                if target_channel not in test_df.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue
                
                # Aplicar extracción del envelope
                emg_columns = [target_channel]  # Solo procesamos el canal objetivo
                envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=2000, cutoff_freq=0.6, envelope_type=1)
                
                # Conservar columnas meta
                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)
                
                # Procesar cada grasp
                for grasp in grasps:
                    try:
                        grasp_df = result_df[result_df['stimulus'] == grasp]
                        
                        if grasp_df.empty:
                            continue
                        
                        # Crear ventanas con overlap a partir del DataFrame filtrado
                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                        
                        # Procesar cada ventana
                        for i, ventana in enumerate(ventanas):
                            if len(ventana) == window_length:  # Solo procesar ventanas completas
                                # Extraer señal del canal objetivo
                                signal = ventana[target_channel].values
                                
                                # Calcular métricas para esta ventana
                                metrics = calculate_emg_metrics_means(signal)
                                
                                # Agregar metadata
                                relabeled_value = grasp_df['relabeled'].iloc[0]
                                metrics_with_meta = {
                                    "subject": folder,
                                    "relabeled": relabeled_value,
                                    "stimulus": grasp,
                                    **metrics  # Desempaquetar todas las métricas calculadas
                                }
                                
                                # Añadir al DataFrame correspondiente
                                if relabeled_value not in metrics_dfs_by_relabeled:
                                    metrics_dfs_by_relabeled[relabeled_value] = []
                                metrics_dfs_by_relabeled[relabeled_value].append(metrics_with_meta)
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue

# Convertir listas en DataFrames, promediar por sujeto y asignar variables
for relabeled_value, data in metrics_dfs_by_relabeled.items():
    df = pd.DataFrame(data)
    df_mean = df.groupby(["subject", "relabeled", "stimulus"]).mean().reset_index()
    var_name = f"df_relabeled_{relabeled_value}_200"
    globals()[var_name] = df_mean
    display(globals()[var_name])


- 100 ms

In [None]:
# Parámetros para el ventaneado
fm = 2000  # Frecuencia de muestreo en Hz
window_length = 200  # Ventana de 300 ms en muestras
overlap = 0  # Sin superposición
target_channel = "Channel 10"  # Canal específico para extraer métricas

# Diccionario para almacenar DataFrames por cada valor único de 'relabeled'
metrics_dfs_by_relabeled = {}

# Buscar carpetas que coincidan con el patrón "s + número" o "Subject + número"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterar sobre todos los archivos .mat en la carpeta
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Intentar cargar el archivo .mat
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Procesar el archivo con src.build_dataframe
                test_df, grasps = src.build_dataframe(
                    mat_file=mat_data,
                    database=database,
                    filename=file_name,
                    rectify=False,
                    normalize=True
                )
                test_df = test_df[test_df['relabeled'].isin(filtered_labels)]
                
                # Verificar si el canal objetivo está presente
                if target_channel not in test_df.columns:
                    print(f"{target_channel} no encontrado en {file_name}, omitiendo.")
                    continue
                
                # Aplicar extracción del envelope
                emg_columns = [target_channel]  # Solo procesamos el canal objetivo
                envelope_df = src.get_envelope_lowpass(test_df[emg_columns], fm=2000, cutoff_freq=0.6, envelope_type=1)
                
                # Conservar columnas meta
                meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)
                
                # Procesar cada grasp
                for grasp in grasps:
                    try:
                        grasp_df = result_df[result_df['stimulus'] == grasp]
                        
                        if grasp_df.empty:
                            continue
                        
                        # Crear ventanas con overlap a partir del DataFrame filtrado
                        ventanas = src.create_windows_with_overlap(grasp_df, window_length, overlap)
                        
                        # Procesar cada ventana
                        for i, ventana in enumerate(ventanas):
                            if len(ventana) == window_length:  # Solo procesar ventanas completas
                                # Extraer señal del canal objetivo
                                signal = ventana[target_channel].values
                                
                                # Calcular métricas para esta ventana
                                metrics = calculate_emg_metrics_means(signal)
                                
                                # Agregar metadata
                                relabeled_value = grasp_df['relabeled'].iloc[0]
                                metrics_with_meta = {
                                    "subject": folder,
                                    "relabeled": relabeled_value,
                                    "stimulus": grasp,
                                    **metrics  # Desempaquetar todas las métricas calculadas
                                }
                                
                                # Añadir al DataFrame correspondiente
                                if relabeled_value not in metrics_dfs_by_relabeled:
                                    metrics_dfs_by_relabeled[relabeled_value] = []
                                metrics_dfs_by_relabeled[relabeled_value].append(metrics_with_meta)
                    except Exception as e:
                        print(f"Error processing grasp {grasp}: {str(e)}")
                        continue

# Convertir listas en DataFrames, promediar por sujeto y asignar variables
for relabeled_value, data in metrics_dfs_by_relabeled.items():
    df = pd.DataFrame(data)
    df_mean = df.groupby(["subject", "relabeled", "stimulus"]).mean().reset_index()
    var_name = f"df_relabeled_{relabeled_value}_100"
    globals()[var_name] = df_mean
    display(globals()[var_name])


## BOXPLOTS 

- Analysis for average of grasps

In [None]:
# Suponiendo que los DataFrames ya están cargados
dataframes = {
    '300': summary_by_subject_movement_300,
    '200': summary_by_subject_movement_200,
    '100': summary_by_subject_movement
}

# Definir las métricas excluyendo 'window_number'
metrics = [col for col in summary_by_subject_movement.columns if col != 'window_number']

# Normalizar los datos
scaler = MinMaxScaler()
normalized_dataframes = {}
for label, df in dataframes.items():
    df_normalized = df.copy()
    df_normalized[metrics] = scaler.fit_transform(df[metrics])
    normalized_dataframes[label] = df_normalized

# Crear un solo DataFrame para facilitar el boxplot
merged_data = []
for label, df in normalized_dataframes.items():
    df_melted = df[metrics].melt(var_name='Métrica', value_name='Valor')
    df_melted['Fuente'] = label
    merged_data.append(df_melted)

df_final = pd.concat(merged_data, ignore_index=True)

# Configurar el gráfico
plt.figure(figsize=(15, 6))
sns.boxplot(data=df_final, x='Métrica', y='Valor', hue='Fuente')
plt.xticks(rotation=90)
plt.title('Distribución de métricas normalizadas en los diferentes DataFrames')
plt.xlabel('Métrica')
plt.ylabel('Valor normalizado')
plt.legend(title='Fuente')
plt.show()

- Analysis for each grasp with all metrics in each windowed

In [None]:
# 1. Organizar los datos por relabeled y tamaño de ventana
ventanas = {
    '100': {},  # DataFrames para ventana de 100ms
    '200': {},  # DataFrames para ventana de 200ms
    '300': {}   # DataFrames para ventana de 300ms
}

# Buscar todos los DataFrames globales que siguen el patrón df_relabeled_*_100
for var_name in globals():
    # Revisamos para las ventanas de 100ms
    if var_name.startswith('df_relabeled_') and var_name.endswith('_100'):
        relabeled_value = var_name.replace('df_relabeled_', '').replace('_100', '')
        ventanas['100'][relabeled_value] = globals()[var_name]
    
    # Para ventanas de 200ms (_200) y 300ms (_300)
    elif var_name.startswith('df_relabeled_') and var_name.endswith('_200'):
        relabeled_value = var_name.replace('df_relabeled_', '').replace('_200', '')
        ventanas['200'][relabeled_value] = globals()[var_name]
    
    elif var_name.startswith('df_relabeled_') and var_name.endswith('_300'):
        relabeled_value = var_name.replace('df_relabeled_', '').replace('_300', '')
        ventanas['300'][relabeled_value] = globals()[var_name]

# 2. Identificar todas las métricas (columnas comunes excluyendo metadatos)
metadata_cols = ['subject', 'relabeled', 'stimulus', 'channel', 'window_id', 'file_name']
first_df = next(iter(next(iter(ventanas.values())).values()), None)

if first_df is not None:
    metrics = [col for col in first_df.columns if col not in metadata_cols]
else:
    metrics = []  

# 3. Crear un DataFrame combinado para cada tamaño de ventana
combined_data = []

for window_size, relabeled_dict in ventanas.items():
    for relabeled_value, df in relabeled_dict.items():
        # Normalizar los datos de métricas para este relabeled
        df_norm = df.copy()
        scaler = MinMaxScaler()
        
        # Solo normalizar columnas numéricas de métricas si hay datos
        if len(df) > 0 and not df[metrics].empty:
            df_norm[metrics] = scaler.fit_transform(df[metrics])
        
        # Convertir a formato largo para seaborn
        df_melted = df_norm[metrics].melt(var_name='Métrica', value_name='Valor')
        df_melted['Relabeled'] = relabeled_value
        df_melted['Ventana'] = f'{window_size}'  # Mantener solo el número para la leyenda
        
        combined_data.append(df_melted)

# Combinar todos los DataFrames
df_final = pd.concat(combined_data, ignore_index=True)

# 4. Crear un gráfico separado para cada relabeled
relabeled_values = df_final['Relabeled'].unique()

for relabeled in relabeled_values:
    # Filtrar datos para este relabeled
    relabeled_data = df_final[df_final['Relabeled'] == relabeled]
    
    # Crear figura
    plt.figure(figsize=(20, 10))
    
    # Crear boxplot con métricas en el eje x y comparando ventanas
    ax = sns.boxplot(
        data=relabeled_data, 
        x='Métrica', 
        y='Valor', 
        hue='Ventana',
        palette=['#3274A1', '#E1812C', '#3A923A']  # Colores similares al ejemplo (azul, naranja, verde)
    )
    
    # Configurar el gráfico
    plt.title(f'Distribución de métricas normalizadas para {relabeled}', fontsize=16)
    plt.xlabel('Métrica', fontsize=14)
    plt.ylabel('Valor normalizado', fontsize=14)
    plt.xticks(rotation=90)  # Rotar etiquetas para mejorar legibilidad
    
    # Personalizar la leyenda para que coincida con el formato del ejemplo
    plt.legend(title='Fuente')
    
    # Ajustar límites del eje y para que sea de 0 a 1 como en el ejemplo
    plt.ylim(0, 1.05)
    
    # Añadir cuadrícula para mejor lectura
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    
    plt.tight_layout()
    
    # Guardar el gráfico (opcional)
    # plt.savefig(f'metricas_relabeled_{relabeled}.png', dpi=300, bbox_inches='tight')
    
    plt.show()

# 5. Opcionalmente, gráfico combinado para todos los relabeled
plt.figure(figsize=(20, 10))

# Crear boxplot con todas las métricas y ventanas
sns.boxplot(
    data=df_final, 
    x='Métrica', 
    y='Valor', 
    hue='Ventana',
    palette=['#3274A1', '#E1812C', '#3A923A'] 
)

# Configurar el gráfico
plt.title('Distribución de métricas normalizadas en los diferentes DataFrames', fontsize=16)
plt.xlabel('Métrica', fontsize=14)
plt.ylabel('Valor normalizado', fontsize=14)
plt.xticks(rotation=90)  # Rotar etiquetas para mejorar legibilidad
plt.legend(title='Fuente')
plt.ylim(0, 1.05)  # Límites del eje y para que sea de 0 a 1 como en el ejemplo
plt.grid(axis='y', linestyle='--', alpha=0.7)  # Añadir cuadrícula

plt.tight_layout()
plt.show()

## Windowed Selection: 200 ms 

In [None]:
# Combinar todos los dataframes en uno solo para facilitar la visualización
all_data = []
relabeled_dfs = {}

# Buscar todas las variables df_relabeled_X_200 en el espacio global
for var_name in list(globals().keys()):
    if var_name.startswith('df_relabeled_') and var_name.endswith('_200'):
        relabeled_value = var_name.split('_')[2]  # Extraer el valor de relabeled
        relabeled_dfs[relabeled_value] = globals()[var_name]
        
        # Añadir los datos al conjunto combinado
        df_copy = globals()[var_name].copy()
        all_data.append(df_copy)

# Combinar todos los dataframes
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    
    # Identificar columnas de métricas (excluyendo columnas de metadatos)
    metric_columns = [col for col in combined_df.columns 
                    if col not in ['subject', 'relabeled', 'stimulus']]
    
    # Crear una figura con subplots para cada métrica
    n_metrics = len(metric_columns)
    fig, axes = plt.subplots(nrows=(n_metrics+1)//2, ncols=2, figsize=(14, 3*((n_metrics+1)//2)), 
                            constrained_layout=True)
    axes = axes.flatten()
    
    # Crear boxplots para cada métrica
    for i, metric in enumerate(metric_columns):
        if i < len(axes):
            # Crear boxplot usando seaborn
            sns.boxplot(x='relabeled', y=metric, data=combined_df, ax=axes[i], palette='viridis')
            
            # Añadir títulos y etiquetas
            axes[i].set_title(f'Comparación de {metric} por categoría "relabeled"')
            axes[i].set_xlabel('Categoría')
            axes[i].set_ylabel(metric)
            
            # Rotar etiquetas del eje x si hay muchas categorías
            if len(combined_df['relabeled'].unique()) > 5:
                axes[i].tick_params(axis='x', rotation=45)
    
    # Eliminar subplots vacíos
    for i in range(n_metrics, len(axes)):
        fig.delaxes(axes[i])
    
    # Añadir título general
    plt.suptitle('Comparación de métricas EMG entre diferentes categorías', fontsize=16, y=1.02)
    
    # Mostrar la figura
    plt.tight_layout()
    plt.show()
    
    # Análisis estadístico básico (opcional)
    print("Stadistic for grasp:")
    for metric in metric_columns:
        print(f"\nMétrica: {metric}")
        display(combined_df.groupby('relabeled')[metric].describe())
else:
    print("No se encontraron variables df_relabeled_X_200 en el espacio global.")

In [None]:
# Combinar todos los dataframes en uno solo para facilitar la visualización
all_data = []
relabeled_dfs = {}

# Buscar todas las variables df_relabeled_X_200 en el espacio global
for var_name in list(globals().keys()):
    if var_name.startswith('df_relabeled_') and var_name.endswith('_200'):
        relabeled_value = var_name.split('_')[2]  # Extraer el valor de relabeled
        relabeled_dfs[relabeled_value] = globals()[var_name]
        
        # Añadir los datos al conjunto combinado
        df_copy = globals()[var_name].copy()
        all_data.append(df_copy)

# Combinar todos los dataframes
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    
    # Identificar columnas de métricas (excluyendo columnas de metadatos)
    metric_columns = [col for col in combined_df.columns 
                    if col not in ['subject', 'relabeled', 'stimulus']]
    
    # Crear una figura con subplots para cada métrica
    n_metrics = len(metric_columns)
    fig, axes = plt.subplots(nrows=(n_metrics+1)//2, ncols=2, figsize=(14, 3*((n_metrics+1)//2)), 
                            constrained_layout=True)
    axes = axes.flatten()
    
    # Crear violin plots para cada métrica
    for i, metric in enumerate(metric_columns):
        if i < len(axes):
            sns.violinplot(x='relabeled', y=metric, data=combined_df, ax=axes[i],
                            palette='viridis', inner='box')  # 'box' muestra la caja dentro del violín
            
            axes[i].set_title(f'Distribución de {metric} por categoría "relabeled"')
            axes[i].set_xlabel('Categoría')
            axes[i].set_ylabel(metric)
            
            if len(combined_df['relabeled'].unique()) > 5:
                axes[i].tick_params(axis='x', rotation=45)
    
    # Eliminar subplots vacíos
    for i in range(n_metrics, len(axes)):
        fig.delaxes(axes[i])
    
    plt.suptitle('Distribución de métricas EMG entre diferentes categorías', fontsize=16, y=1.02)
    plt.tight_layout()
    plt.show()
    
    # Análisis estadístico básico (opcional)
    print("Stadistic for grasp:")
    for metric in metric_columns:
        print(f"\nMétrica: {metric}")
        display(combined_df.groupby('relabeled')[metric].describe())
else:
    print("No se encontraron variables df_relabeled_X_200 en el espacio global.")


In [None]:
corr_matrix = combined_df[metric_columns].corr()

# Crear el heatmap de correlación
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Mapa de Correlación de Métricas')
plt.show()



In [None]:
from sklearn.ensemble import RandomForestClassifier

# Definir las variables predictoras (métricas) y la variable objetivo ('relabeled')
X = combined_df[metric_columns]
y = combined_df['relabeled']

# Crear el modelo de Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# Obtener la importancia de las características
importances = rf_model.feature_importances_

# Crear un gráfico de barras con la importancia de las características
plt.figure(figsize=(11, 6))
sns.barplot(x=metric_columns, y=importances, palette='viridis')
plt.title('Importancia de Características para la Clasificación de Agarre')
plt.xlabel('Métricas')
plt.ylabel('Importancia')
plt.xticks(rotation=45)
plt.show()
