Libreries 

In [None]:
import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from itertools import combinations
from scipy import stats
from scipy.io import loadmat, whosmat
from scipy.spatial.distance import pdist, squareform, cdist
from scipy.cluster.hierarchy import dendrogram, linkage
import scipy.cluster.hierarchy as sch

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans

from statsmodels.multivariate.manova import MANOVA

import src
from src import config, loadmatNina
import pywt

from src.preprocessing_utils import get_envelope

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.cluster.hierarchy as sch
from matplotlib.patches import Ellipse

import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as ssd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.decomposition import PCA
from scipy.linalg import inv


In [None]:
# Choose the database to analyze
database = 'DB4'

data_path = f'data/{database}'

# Find the folder named with the convention s + "number"
folder = None
for item in os.listdir(data_path):
    if re.match(r'[sS]\d+', item) or re.match(r'Subject\d+', item):
        folder = item
        break

if folder:
    folder_path = os.path.join(data_path, folder)
    results = []

    # Iterate over all .mat files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.mat'):
            file_path = os.path.join(folder_path, file_name)
            info = whosmat(file_path)
            results.append((file_name, info))

    # Create a DataFrame to store the results
    data = {}
    for file_name, info in results:
        for item in info:
            if item[0] not in data:
                data[item[0]] = {}
            data[item[0]][file_name] = item[1:]

    df = pd.DataFrame(data)
    df = df.transpose()
    df.columns.name = 'File Name'

    print(df)
else:
    print("No folder found with the convention s + 'number'")

Functions

- For complete signal

In [None]:
def calculate_emg_metrics(signal, fs=1000):
    """
    Calculates various metrics for an EMG signal.

    Parameters:
    - signal: NumPy array containing the EMG signal.
    - fs: Sampling frequency in Hz (default: 1000 Hz).

    Returns:
    - A dictionary with the computed metrics.
    """
    # Mean Absolute Value (MAV)
    mav = np.mean(np.abs(signal))
    
    # Integrated Absolute Value (IAV)
    iav = np.sum(np.abs(signal))
    
    # Root Mean Square (RMS)
    rms = np.sqrt(np.mean(signal**2))
    
    # Waveform Length (WL)
    wl = np.sum(np.abs(np.diff(signal)))
    
    # Zero Crossings (ZC)
    zc = np.sum(np.diff(np.sign(signal)) != 0)
    
    # Slope Sign Changes (SSC)
    diff_signal = np.diff(signal)
    ssc = np.sum((diff_signal[1:] * diff_signal[:-1]) < 0)
    
    # Variance (VAR)
    var = np.var(signal)
    
    # Coefficient of Variation (CoV)
    mean_signal = np.mean(signal)
    cov = (np.std(signal) / mean_signal) if mean_signal != 0 else 0
    
    # Mean Frequency (MNF)
    freqs = np.fft.rfftfreq(len(signal), d=1/fs)
    fft_magnitude = np.abs(np.fft.rfft(signal))
    mnf = np.sum(freqs * fft_magnitude) / np.sum(fft_magnitude)
    
    # Marginal Discrete Wavelet Transform (mDWT)
    coeffs = pywt.wavedec(signal, 'db4', level=4)
    mdwt = np.sum([np.sum(np.abs(c)) for c in coeffs])
    
    # Temporal Difference (TD)
    td = np.sum(np.abs(np.diff(signal)))
    
    # Mean Absolute Value Slope (MAVS)
    mavs = np.mean(np.abs(np.diff(signal)))
    
    # Return the metrics as a dictionary
    metrics = {
        "MAV": mav,
        "IAV": iav,
        "RMS": rms,
        "WL": wl,
        "ZC": zc,
        "SSC": ssc,
        "VAR": var,
        "CoV": cov,
        "MNF": mnf,
        "mDWT": mdwt,
        "TD": td,
        "MAVS": mavs
    }
    
    return metrics


- For signal with means and standard deviation

In [None]:
def calculate_emg_metrics_std(signal, fs=1000):
    """
    Calculates various metrics for an EMG signal, including mean and standard deviation.

    Parameters:
    - signal: NumPy array containing the EMG signal.
    - fs: Sampling frequency in Hz (default: 1000 Hz).

    Returns:
    - A dictionary with the computed metrics.
    """
    if signal.ndim == 2:
        metrics_per_channel = [calculate_emg_metrics(signal[:, ch], fs) for ch in range(signal.shape[1])]
        averaged_metrics = {key: np.mean([m[key] for m in metrics_per_channel]) for key in metrics_per_channel[0]}
        return averaged_metrics
    
    # Mean Absolute Value (MAV)
    mav = np.mean(np.abs(signal))
    mav_std = np.std(np.abs(signal))
    
    # Integrated Absolute Value (IAV)
    iav = np.sum(np.abs(signal))
    iav_std = np.std(np.abs(signal))
    
    # Root Mean Square (RMS)
    rms = np.sqrt(np.mean(signal**2))
    rms_std = np.std(signal)
    
    # Waveform Length (WL)
    wl = np.sum(np.abs(np.diff(signal)))
    wl_std = np.std(np.abs(np.diff(signal)))
    
    # Zero Crossings (ZC)
    zc = np.sum(np.diff(np.sign(signal)) != 0)
    zc_std = np.std(np.diff(np.sign(signal)) != 0)
    
    # Slope Sign Changes (SSC)
    diff_signal = np.diff(signal)
    ssc = np.sum((diff_signal[1:] * diff_signal[:-1]) < 0)
    ssc_std = np.std((diff_signal[1:] * diff_signal[:-1]) < 0)
    
    # Variance (VAR)
    var = np.var(signal)
    var_std = np.std(signal)
    
    # Coefficient of Variation (CoV)
    mean_signal = np.mean(signal)
    cov = (np.std(signal) / mean_signal) if mean_signal != 0 else 0
    cov_std = np.std(cov)
    
    # Mean Frequency (MNF)
    freqs = np.fft.rfftfreq(len(signal), d=1/fs)
    fft_magnitude = np.abs(np.fft.rfft(signal))
    mnf = np.sum(freqs * fft_magnitude) / np.sum(fft_magnitude)
    mnf_std = np.std(freqs * fft_magnitude) / np.sum(fft_magnitude)
    
    # Marginal Discrete Wavelet Transform (mDWT)
    coeffs = pywt.wavedec(signal, 'db4', level=4)
    mdwt = np.sum([np.sum(np.abs(c)) for c in coeffs])
    mdwt_std = np.std([np.sum(np.abs(c)) for c in coeffs])
    
    # Temporal Difference (TD)
    td = np.sum(np.abs(np.diff(signal)))
    td_std = np.std(np.abs(np.diff(signal)))
    
    # Mean Absolute Value Slope (MAVS)
    mavs = np.mean(np.abs(np.diff(signal)))
    mavs_std = np.std(np.abs(np.diff(signal)))
    
    # Return the metrics as a dictionary
    metrics = {
        "MAV": mav, "MAV_STD": mav_std,
        "IAV": iav, "IAV_STD": iav_std,
        "RMS": rms, "RMS_STD": rms_std,
        "WL": wl, "WL_STD": wl_std,
        "ZC": zc, "ZC_STD": zc_std,
        "SSC": ssc, "SSC_STD": ssc_std,
        "VAR": var, "VAR_STD": var_std,
        "CoV": cov, "CoV_STD": cov_std,
        "MNF": mnf, "MNF_STD": mnf_std,
        "mDWT": mdwt, "mDWT_STD": mdwt_std,
        "TD": td, "TD_STD": td_std,
        "MAVS": mavs, "MAVS_STD": mavs_std
    }
    
    return metrics


- This functions calculate the metrics for channel and average the values for a complete result

In [None]:
def calculate_emg_metrics_means(signal):
    """
    Calculates the metrics of an EMG signal. If there are multiple channels, it computes 
    the metrics for each channel and then averages the results.
    """
    if signal.ndim == 2:  # If the signal has multiple channels
        metrics_per_channel = [calculate_emg_metrics_means(signal[:, ch]) for ch in range(signal.shape[1])]
        averaged_metrics = {key: np.mean([m[key] for m in metrics_per_channel]) for key in metrics_per_channel[0]}
        return averaged_metrics
    
    # Mean Absolute Value (MAV)
    mav = np.mean(np.abs(signal))
    
    # Integrated Absolute Value (IAV)
    iav = np.sum(np.abs(signal))
    
    # Root Mean Square (RMS)
    rms = np.sqrt(np.mean(signal**2))
    
    # Waveform Length (WL)
    wl = np.sum(np.abs(np.diff(signal)))
    
    # Zero Crossings (ZC)
    zc = np.sum(np.diff(np.sign(signal)) != 0)
    
    # Slope Sign Changes (SSC)
    diff_signal = np.diff(signal)
    ssc = np.sum((diff_signal[1:] * diff_signal[:-1]) < 0)
    
    # Variance (VAR)
    var = np.var(signal)
    
    # Coefficient of Variation (CoV)
    mean_signal = np.mean(signal)
    cov = (np.std(signal) / mean_signal) if mean_signal != 0 else 0
    
    # Mean Frequency (MNF)
    freqs = np.fft.rfftfreq(len(signal), d=1/fs)
    fft_magnitude = np.abs(np.fft.rfft(signal))
    mnf = np.sum(freqs * fft_magnitude) / np.sum(fft_magnitude)
    
    # Marginal Discrete Wavelet Transform (mDWT)
    coeffs = pywt.wavedec(signal, 'db4', level=4)
    mdwt = np.sum([np.sum(np.abs(c)) for c in coeffs])
    
    # Temporal Difference (TD)
    td = np.sum(np.abs(np.diff(signal)))
    
    # Mean Absolute Value Slope (MAVS)
    mavs = np.mean(np.abs(np.diff(signal)))
    
    # Return the metrics as a dictionary
    metrics = {
        "MAV": mav,
        "IAV": iav,
        "RMS": rms,
        "WL": wl,
        "ZC": zc,
        "SSC": ssc,
        "VAR": var,
        "CoV": cov,
        "MNF": mnf,
        "mDWT": mdwt,
        "TD": td,
        "MAVS": mavs
    }
    
    return metrics

Plots and metrics for complete grasp

In [None]:
# Database name
database = 'DB4'

# Full path to the database folder
data_path = os.path.abspath(os.path.join('data', database))

# List of subjects, generating names from 's1' to 's10'
subjects = [f's{i}' for i in range(1, 11)]

# Iterate over each subject
for subject in subjects:
    subject_dir = os.path.join(data_path, subject)
    
    # Iterate over exercise files E1, E2, and E3 for the current subject
    for exercise in ["E1", "E2", "E3"]:
        filename = f"{subject.upper()}_{exercise}_A1.mat"
        file_path = os.path.join(subject_dir, filename)
        
        # Check if the file exists
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            continue
        
        print(f"\nProcessing: {filename}")
        
        # Load data from the .mat file
        mat_data = src.loadmatNina(database, filename, subject=subject)
        
        # Verify the structure of the loaded dictionary
        print(f"Keys in mat_data: {mat_data.keys()}")
        
        # Retrieve re-labeled data and the list of labeled grasps
        test_df, grasps_etiquetados = src.build_dataframe(
            mat_file=mat_data,
            database=database,
            filename=filename,
            rectify=False,
            normalize=True
        )
        
        # Iterate over each labeled grasp
        for grasp in grasps_etiquetados:
            try:
                # Check if 'emg' key exists in mat_data
                if 'emg' not in mat_data:
                    raise KeyError(f"The key 'emg' is not in mat_data. Available keys: {mat_data.keys()}")
                
                # Get the EMG signal for the labeled grasp
                emg_signal = mat_data['emg'][grasp]  # Adjust based on the actual structure
                
                # Compute EMG signal metrics
                metrics = calculate_emg_metrics(emg_signal)
                
                # Print computed metrics
                print(f"\nMetrics for Grasp {grasp}:")
                for key, value in metrics.items():
                    print(f"{key}: {value:.4f}")
                
                # Plot the EMG signal for the grasp
                src.plot_emg_data(
                    database=database,
                    mat_file=mat_data,
                    grasp_number=grasp,
                    interactive=False,
                    include_rest=True,
                    use_stimulus=False,
                    addFourier=False,
                    padding=100,
                    title=f"{filename} - Grasp {grasp}"
                )
            except KeyError as e:
                print(f"    Error: {str(e)}")
            except Exception as e:
                print(f"    Error processing grasp {grasp}: {str(e)}")
                continue


In [None]:
display(mat_data)

Dataframe with metrics for a complete signal without discriminating by channel

In [None]:
# Database name
database = 'DB4'

# Full path to the database folder
data_path = os.path.abspath(os.path.join('data', database))

# List of subjects, generating names from 's1' to 's10'
subjects = [f's{i}' for i in range(1, 11)]

# List to store all extracted metrics
metrics_data = []

# Iterate over each subject in the database
for subject in subjects:
    subject_dir = os.path.join(data_path, subject)
    
    # Iterate over exercise files E1, E2, and E3
    for exercise in ["E1", "E2", "E3"]:
        filename = f"{subject.upper()}_{exercise}_A1.mat"
        file_path = os.path.join(subject_dir, filename)
        
        # Check if the file exists before processing
        if not os.path.exists(file_path):
            continue  # Skip if file is not available
        
        # Load data from the .mat file
        mat_data = src.loadmatNina(database, filename, subject=subject)
        
        # Build DataFrame with re-labeled data
        test_df, grasps_etiquetados = src.build_dataframe(
            mat_file=mat_data,
            database=database,
            filename=filename,
            rectify=False,
            normalize=True
        )
        
        # Iterate over labeled grasps
        for grasp in grasps_etiquetados:
            try:
                # Retrieve the corresponding EMG signal
                emg_signal = mat_data['emg'][grasp]
                
                # Compute EMG signal metrics
                metrics = calculate_emg_metrics(emg_signal)
                
                # Append metrics with metadata to the list
                metrics_data.append({
                    "subject": subject,
                    "exercise": exercise,
                    "filename": filename,
                    "grasp": grasp,
                    **metrics  # Unpack metrics into the dictionary
                })
                
            except Exception as e:
                print(f"Error in {filename} - Grasp {grasp}: {str(e)}")
                continue

# Create a DataFrame with organized metrics
metrics_df = pd.DataFrame(metrics_data)

# Reorder columns (optional) for better visualization
column_order = ["subject", "exercise", "filename", "grasp"] + list(metrics.keys())
metrics_df = metrics_df[column_order]

# Print the final DataFrame with extracted metrics
print("\nMetrics DataFrame:")
metrics_df



Dataframe with average of metrics for channels in each grasp

Dataframe with mean and standard deviation

In [None]:
# List to store all computed metrics
metrics_data = []

# Iterate over each subject in the database
for subject in subjects:
    subject_dir = os.path.join(data_path, subject)
    
    # Iterate over exercise files E1, E2, and E3
    for exercise in ["E1", "E2", "E3"]:
        filename = f"{subject.upper()}_{exercise}_A1.mat"
        file_path = os.path.join(subject_dir, filename)
        
        # Check if the file exists before processing
        if not os.path.exists(file_path):
            continue  # Skip if file is not available
        
        # Load data from the .mat file
        mat_data = src.loadmatNina(database, filename, subject=subject)
        
        # Build DataFrame with re-labeled data
        test_df, grasps_etiquetados = src.build_dataframe(
            mat_file=mat_data,
            database=database,
            filename=filename,
            rectify=False,
            normalize=True
        )
        
        # Iterate over labeled grasps
        for grasp in grasps_etiquetados:
            try:
                # Retrieve the corresponding EMG signal
                emg_signal = mat_data['emg'][grasp]
                
                # Compute EMG signal metrics using standard deviation
                metrics = calculate_emg_metrics_std(emg_signal)
                
                # Append metrics with metadata to the list
                metrics_data.append({
                    "subject": subject,
                    "exercise": exercise,
                    "filename": filename,
                    "grasp": grasp,
                    **metrics  # Unpack metrics into the dictionary
                })
                
            except Exception as e:
                print(f"Error in {filename} - Grasp {grasp}: {str(e)}")
                continue

# Create a DataFrame with organized metrics
metrics_df_std = pd.DataFrame(metrics_data)

# Reorder columns (optional) for better visualization
column_order = ["subject", "exercise", "filename", "grasp"] + list(metrics.keys())
metrics_df_std = metrics_df_std[column_order]

# Print the final DataFrame with extracted metrics
print("\nMetrics DataFrame:")
display(metrics_df_std)


Dataframe for every channels of data base

In [None]:
# List to store all generated DataFrames
all_dataframes = []

# Look for folders matching the pattern "s + number" or "Subject + number"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterate over all .mat files in the folder
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Attempt to load the .mat file
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Attempt to process the file with src.build_dataframe
                try:
                    test_df, grasps = src.build_dataframe(
                        mat_file=mat_data,
                        database=database,
                        filename=file_name,
                        rectify=False,
                        normalize=True
                    )
                    
                    # Add a column with the subject name (folder) to the DataFrame
                    test_df['subject'] = folder  
                    
                    # Append the processed DataFrame to the list
                    all_dataframes.append(test_df)

                except Exception as e:
                    print(f"Error processing {file_name}: {str(e)}")
                    continue

# Concatenate all DataFrames into a single one if data is available
if all_dataframes:  
    combined_df = pd.concat(all_dataframes, ignore_index=True)
    
    # Display the combined DataFrame
    print("\n Combined DataFrame:")
    display(combined_df)  

else:
    print("Warning: No DataFrames were generated. Check the input data.")

In [None]:
# List to store all generated DataFrames
all_dataframes = []

# Look for folders matching the pattern "s + number" or "Subject + number"
for folder in os.listdir(data_path):
    if re.match(r'[sS]\d+', folder) or re.match(r'Subject\d+', folder):
        folder_path = os.path.join(data_path, folder)
        
        # Iterate over all .mat files in the folder
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.mat'):
                file_path = os.path.join(folder_path, file_name)
                
                # Attempt to load the .mat file
                try:
                    mat_data = src.loadmatNina(database, file_name, subject=folder)
                except Exception as e:
                    print(f"Error loading {file_name}: {str(e)}")
                    continue
                
                # Attempt to process the file with src.build_dataframe
                try:
                    test_df, grasps = src.build_dataframe(
                        mat_file=mat_data,
                        database=database,
                        filename=file_name,
                        rectify=False,
                        normalize=True
                    )
                    
                    # Apply envelope extraction
                    emg_columns = [col for col in test_df.columns if "Channel" in col]
                    envelope_df = get_envelope(test_df[emg_columns], envelope_type=1)  # Change type as needed
                    
                    # Preserve non-EMG columns
                    meta_columns = ["Time (s)", "subject", "re_repetition", "stimulus", "relabeled"]
                    result_df = pd.concat([envelope_df, test_df[meta_columns]], axis=1)
                    
                    # Add a column with the subject name (folder) to the DataFrame
                    result_df['subject'] = folder  
                    
                    # Append the processed DataFrame to the list
                    all_dataframes.append(result_df)
                
                except Exception as e:
                    print(f"Error processing {file_name}: {str(e)}")
                    continue

# Concatenate all DataFrames into a single one if data is available
if all_dataframes:  
    combined_df = pd.concat(all_dataframes, ignore_index=True)
    
    # Display the combined DataFrame
    print("\n Combined DataFrame:")
    display(combined_df)  

else:
    print("Warning: No DataFrames were generated. Check the input data.")


Dataframe with metrics for channel

In [None]:
def calculate_emg_metrics(signal, fs=2000):
    """
    Calculates various metrics for an EMG signal, including mean and standard deviation.

    Parameters:
    - signal: NumPy array containing the EMG signal.
    - fs: Sampling frequency in Hz (default: 1000 Hz).

    Returns:
    - A dictionary with the computed metrics.
    """
    try:
        if signal.ndim == 2:
            metrics_per_channel = [calculate_emg_metrics(signal[:, ch], fs) for ch in range(signal.shape[1])]
            averaged_metrics = {key: np.mean([m[key] for m in metrics_per_channel]) for key in metrics_per_channel[0]}
            return averaged_metrics
        
        abs_signal = np.abs(signal)
        diff_signal = np.diff(signal)
        diff_abs_signal = np.abs(diff_signal)
        
        # Compute Metrics
        metrics = {
            "MAV": np.mean(abs_signal), "MAV_STD": np.std(abs_signal),
            "IAV": np.sum(abs_signal), "IAV_STD": np.std(abs_signal),
            "RMS": np.sqrt(np.mean(signal**2)), "RMS_STD": np.std(signal),
            "WL": np.sum(diff_abs_signal), "WL_STD": np.std(diff_abs_signal),
            "ZC": np.sum(np.diff(np.sign(signal)) != 0), "ZC_STD": np.std(np.diff(np.sign(signal)) != 0),
            "SSC": np.sum((diff_signal[1:] * diff_signal[:-1]) < 0), "SSC_STD": np.std((diff_signal[1:] * diff_signal[:-1]) < 0),
            "VAR": np.var(signal), "VAR_STD": np.std(signal),
            "CoV": (np.std(signal) / np.mean(signal)) if np.mean(signal) != 0 else 0,
            "TD": np.sum(diff_abs_signal), "TD_STD": np.std(diff_abs_signal),
            "MAVS": np.mean(diff_abs_signal), "MAVS_STD": np.std(diff_abs_signal),
            "MNP": np.mean(signal**2), "MNP_STD": np.std(signal**2),
        }
        
        # Spectral Metrics
        freqs = np.fft.rfftfreq(len(signal), d=1/fs)
        fft_magnitude = np.abs(np.fft.rfft(signal))
        metrics["MNF"] = np.sum(freqs * fft_magnitude) / np.sum(fft_magnitude) if np.sum(fft_magnitude) != 0 else 0
        metrics["MNF_STD"] = np.std(freqs * fft_magnitude) / np.sum(fft_magnitude) if np.sum(fft_magnitude) != 0 else 0
        
        # Wavelet Transform
        coeffs = pywt.wavedec(signal, 'db4', level=4)
        mdwt_values = np.array([np.sum(np.abs(c)) for c in coeffs])
        metrics["mDWT"] = np.sum(mdwt_values)
        metrics["mDWT_STD"] = np.std(mdwt_values)
        
        # Kurtosis
        std_signal = np.std(signal)
        metrics["Kurt"] = np.mean((signal - np.mean(signal)) ** 4) / (std_signal ** 4) if std_signal != 0 else 0
        metrics["Kurt_STD"] = np.std(metrics["Kurt"])
        
        return metrics
    
    except Exception as e:
        print(f"Error in calculate_emg_metrics: {e}")
        return {}

# List to store the calculated metrics for each channel
metrics_data = []

# Iterate over each subject and each identified movement (relabeled or stimulus)
for (subject, relabeled), group in combined_df.groupby(['subject', 'relabeled']):  # Change 'relabeled' to 'stimulus' if needed
    # Iterate over each EMG channel
    for channel in group.columns:  # Loop through all DataFrame columns
        if channel.startswith('Channel'):  # Filter only EMG signal columns
            # Get the signal values for the current channel
            channel_signal = group[channel].values
            
            # Compute EMG signal metrics for the current channel
            metrics = calculate_emg_metrics(channel_signal)
            
            # Append metadata and computed metrics to the list
            metrics_data.append({
                "subject": subject,  # Subject identification
                "relabeled": relabeled,  # Movement identification (relabeled or stimulus)
                "channel": channel,  # EMG channel
                **metrics  # Unpack all computed metrics
            })

# Create a DataFrame containing all the obtained metrics
metrics_df = pd.DataFrame(metrics_data)

# Reorder columns for better visualization (optional)
column_order = ["subject", "relabeled", "channel"] + list(metrics.keys())
metrics_df = metrics_df[column_order]

# Display the DataFrame with the computed metrics
print("\nMetrics DataFrame by Channel, Subject, and Relabeled:")
display(metrics_df)


In [None]:
# Remove the 'channel' column to group data by subject and movement type
grouped_df = metrics_df.drop(columns=['channel'])

# Compute the mean value of each metric grouped by subject and movement
df_mean = grouped_df.groupby(['subject', 'relabeled']).mean()

# Compute the standard deviation of each metric grouped by subject and movement
df_std = grouped_df.groupby(['subject', 'relabeled']).std()

# Rename columns to indicate they contain mean values
df_mean.columns = [f"{col} mean" for col in df_mean.columns]

# Rename columns to indicate they contain standard deviation values
df_std.columns = [f"{col} std" for col in df_std.columns]

# Merge the mean and standard deviation DataFrames into a single DataFrame
df_result = df_mean.merge(df_std, on=['subject', 'relabeled']).reset_index()

# Display the final DataFrame with aggregated metrics
display(df_result)


Dendogram for grasp 

In [None]:
# Select only the columns containing numerical features
features = df_result.iloc[:, 2:]  # Exclude 'subject' and 'relabeled'

# Normalize the data to improve comparability and avoid bias due to different scales
df_scaled = StandardScaler().fit_transform(features)

# Apply hierarchical clustering using the Ward method (minimizes variance within clusters)
linked = sch.linkage(df_scaled, method='ward')

# Create and visualize the dendrogram
plt.figure(figsize=(20, 10))
sch.dendrogram(
    linked, 
    labels=df_result['relabeled'].values,  # Labels on the x-axis based on the 'relabeled' variable
    leaf_rotation=90,  # Rotate labels for better readability
    leaf_font_size=8  # Adjust font size
)
plt.title("Dendrogram based on the 'relabeled' variable")
plt.xlabel("Clusters")
plt.ylabel("Euclidean Distance")
plt.show()


In [None]:
# Group the data by 'relabeled' and calculate the mean and standard deviation of each numerical feature
grouped = df_result.select_dtypes(include=['number']).groupby(df_result['relabeled']).agg(['mean', 'std'])
display(grouped)

In [None]:
grouped.head()

In [None]:
# Group the data by 'relabeled' and calculate the mean and standard deviation of each numerical feature
grouped = df_result.select_dtypes(include=['number']).groupby(df_result['relabeled']).agg(['mean', 'std'])
display(grouped)

# Flatten column names to make them easier to work with
grouped.columns = ['_'.join(col).strip() for col in grouped.columns.values]

# Normalize the data to prevent magnitude differences from affecting the clustering distance
scaler = StandardScaler()
scaled_features = scaler.fit_transform(grouped)

# Apply hierarchical clustering using the Ward method (minimizes variance within clusters)
linked = sch.linkage(scaled_features, method='ward')

# Create and visualize the dendrogram
plt.figure(figsize=(12, 6))
sch.dendrogram(
    linked, 
    labels=grouped.index.tolist(),  # Labels on the x-axis based on the 'relabeled' variable
    leaf_rotation=90,  # Rotate labels for better readability
    leaf_font_size=8  # Adjust font size
)
plt.title("Dendrogram based on mean and standard deviation per grasp type")
plt.xlabel("Grasps")
plt.ylabel("Euclidean Distance") 
plt.show()


In [None]:
# 1. Compute the average of metrics per channel
# Exclude 'subject', 'relabeled', and 'channel' to keep only the metric columns
metrics_columns = [col for col in metrics_df.columns if col not in ["subject", "relabeled", "channel"]]

# Group by 'channel' and compute the mean of each metric
average_metrics_df = metrics_df.groupby('channel')[metrics_columns].mean().reset_index()
display(average_metrics_df)

# 2. Prepare data for clustering
X = average_metrics_df[metrics_columns].values  # Extract metric values as an array for clustering

# 3. Compute the distance matrix and perform hierarchical clustering
Z = linkage(X, method='ward')  # 'ward' minimizes variance within clusters

# 4. Plot the dendrogram with adjustments for better visualization
plt.figure(figsize=(15, 8)) 
plt.title('Dendrogram of EMG Channels (Average Metrics)', fontsize=16, pad=20)
plt.xlabel('Channels', fontsize=14)
plt.ylabel('Distance', fontsize=14)

# Adjust the dendrogram to prevent overlapping labels
dendrogram(
    Z,
    labels=average_metrics_df['channel'].values,  # Labels for each channel
    leaf_rotation=90,  # Rotate labels for better readability
    leaf_font_size=12,  # Adjust font size
    color_threshold=0.7 * max(Z[:, 2]),  # Threshold to color clusters
)

plt.tight_layout()  # Automatically adjust layout for better fit
plt.show()


In [None]:
# 1. Compute the mean and standard deviation of metrics per channel
# Exclude 'subject', 'relabeled', and 'channel' to keep only numerical metric columns
metrics_columns = [col for col in metrics_df.columns if col not in ["subject", "relabeled", "channel"]]

# Group by 'channel' and compute the mean and standard deviation for each metric
agg_metrics_df = metrics_df.groupby('channel')[metrics_columns].agg(['mean', 'std']).reset_index()

# Flatten column names for easier access (concatenating "metric_type")
agg_metrics_df.columns = ['_'.join(col).strip('_') for col in agg_metrics_df.columns]

display(agg_metrics_df)  # Display the aggregated metrics table

# 2. Prepare data for clustering using only the metric averages
X = agg_metrics_df[[col for col in agg_metrics_df.columns if col.endswith('_mean')]].values  # Extract only "_mean" columns

# 3. Compute the distance matrix and perform hierarchical clustering
Z = linkage(X, method='ward')  # 'ward' minimizes variance within clusters

# 4. Plot the dendrogram with adjustments for better visualization
plt.figure(figsize=(15, 8)) 
plt.title('Dendrogram of EMG Channels (Average Metrics)', fontsize=16, pad=20)
plt.xlabel('Channels', fontsize=14)
plt.ylabel('Distance', fontsize=14)

# Adjust the dendrogram to prevent overlapping labels
dendrogram(
    Z,
    labels=agg_metrics_df['channel'].values,  # Labels for EMG channels
    leaf_rotation=90,  # Rotate labels for better readability
    leaf_font_size=12,  # Adjust font size
    color_threshold=0.7 * max(Z[:, 2]),  # Threshold to color clusters
)

plt.tight_layout()  # Automatically adjust layout for better fit
plt.show()


Mahalanobis use

- Complete

In [None]:
# Select only the metrics of interest: RMS and MNF
metrics = ['RMS mean', 'RMS_STD mean', 'RMS std', 'RMS_STD std', 'MNF mean', 'MNF_STD mean', 'MNF std', 'MNF_STD std']
dependent_vars = [col for col in df_result.columns if col in metrics]
print(dependent_vars)

# Check and remove redundant columns due to high correlation
corr_matrix = df_result[dependent_vars].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.95)]
dependent_vars = [col for col in dependent_vars if col not in to_drop]

# Handle missing values
df_result = df_result.dropna(subset=dependent_vars + ['relabeled'])

# Verify variables in the DataFrame
missing_vars = [var for var in dependent_vars if var not in df_result.columns]
if missing_vars:
    raise ValueError(f"The following dependent variables are missing in the DataFrame: {missing_vars}")
if 'relabeled' not in df_result.columns:
    raise ValueError("The 'relabeled' column is not present in the DataFrame.")

# MANOVA analysis
formula = f"{' + '.join(dependent_vars)} ~ relabeled"
try:
    manova = MANOVA.from_formula(formula, data=df_result)
    print(manova.mv_test())
except Exception as e:
    print(f"Error in MANOVA: {e}")

# Group by 'relabeled' and compute the mean of each metric
grouped = df_result[dependent_vars].groupby(df_result['relabeled']).mean()

# Feature normalization
scaler = StandardScaler()
scaled_features = scaler.fit_transform(grouped)

# Compute Mahalanobis distance
cov_matrix = np.cov(scaled_features, rowvar=False)
inv_cov_matrix = np.linalg.pinv(cov_matrix)
mahalanobis_distances = pdist(scaled_features, metric='mahalanobis', VI=inv_cov_matrix)

# Square distance matrix
distance_matrix = squareform(mahalanobis_distances)

# Agglomerative hierarchical clustering
linked = sch.linkage(distance_matrix, method='complete')
linked[:, 2] /= np.max(linked[:, 2])

# Dendrogram
plt.figure(figsize=(14, 6))
sch.dendrogram(
    linked,
    labels=grouped.index.tolist(),
    leaf_rotation=90,
    leaf_font_size=10,
    color_threshold=0.4 * max(linked[:, 2])
)
plt.title("Dendrogram Based on Normalized Mahalanobis Distance", fontsize=14, pad=15)
plt.xlabel("Grasps", fontsize=12)
plt.ylabel("Normalized Mahalanobis Distance", fontsize=12)
plt.xticks(rotation=90)
plt.show()

Without rest

- Complete

In [None]:
# 1. Select only the metrics of interest
metrics = ['RMS mean', 'RMS_STD mean', 'RMS std', 'RMS_STD std', 'MNF mean', 'MNF_STD mean', 'MNF std', 'MNF_STD std']
dependent_vars = [col for col in df_result.columns if col in metrics]
print("Selected variables:", dependent_vars)

# 2. Exclude data where relabeled == 0
df_result = df_result[df_result['relabeled'] != 0]

# 3. Check and remove highly correlated columns
corr_matrix = df_result[dependent_vars].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.95)]  # Threshold: 0.95
dependent_vars = [col for col in dependent_vars if col not in to_drop]

# 4. Handle missing values
df_result = df_result.dropna(subset=dependent_vars + ['relabeled'])

# 5. Verify variables in the DataFrame
missing_vars = [var for var in dependent_vars if var not in df_result.columns]
if missing_vars:
    raise ValueError(f"The following dependent variables are missing in the DataFrame: {missing_vars}")
if 'relabeled' not in df_result.columns:
    raise ValueError("The 'relabeled' column is not present in the DataFrame.")

# 6. MANOVA analysis
formula = f"{' + '.join(dependent_vars)} ~ relabeled"
try:
    manova = MANOVA.from_formula(formula, data=df_result)
    print(manova.mv_test())
except Exception as e:
    print(f"Error in MANOVA: {e}")

# 7. Group by 'relabeled' and compute the mean of each metric
grouped = df_result[dependent_vars].groupby(df_result['relabeled']).mean()

# 8. Feature normalization
scaler = StandardScaler()
scaled_features = scaler.fit_transform(grouped)

# 9. Compute Mahalanobis distance
cov_matrix = np.cov(scaled_features, rowvar=False)
inv_cov_matrix = np.linalg.pinv(cov_matrix)  # Pseudo-inverse for numerical stability
mahalanobis_distances = pdist(scaled_features, metric='mahalanobis', VI=inv_cov_matrix)

# 10. Square distance matrix
distance_matrix = squareform(mahalanobis_distances)

# 11. Agglomerative hierarchical clustering
linked = sch.linkage(distance_matrix, method='complete')

# 12. Normalize distances for the dendrogram
linked[:, 2] /= np.max(linked[:, 2])

# 13. Dendrogram
plt.figure(figsize=(14, 6))
sch.dendrogram(
    linked,
    labels=grouped.index.tolist(),
    leaf_rotation=90,
    leaf_font_size=10,
    color_threshold=0.4 * max(linked[:, 2])
)
plt.title("Dendrogram Based on Normalized Mahalanobis Distance", fontsize=14, pad=15)
plt.xlabel("Grasps", fontsize=12)
plt.ylabel("Normalized Mahalanobis Distance", fontsize=12)
plt.xticks(rotation=90)
plt.show()

In [None]:
# 1. Select only the metrics of interest
metrics = ['RMS mean', 'RMS_STD mean', 'RMS std', 'RMS_STD std', 'MNF mean', 'MNF_STD mean', 'MNF std', 'MNF_STD std']
dependent_vars = [col for col in df_result.columns if col in metrics]
print("Selected variables:", dependent_vars)

# 2. Exclude data where relabeled == 0
df_result = df_result[df_result['relabeled'] != 0]

# 3. Check and remove highly correlated columns
corr_matrix = df_result[dependent_vars].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.95)]  # Threshold: 0.95
dependent_vars = [col for col in dependent_vars if col not in to_drop]

# 4. Handle missing values
df_result = df_result.dropna(subset=dependent_vars + ['relabeled'])

# 5. Verify variables in the DataFrame
missing_vars = [var for var in dependent_vars if var not in df_result.columns]
if missing_vars:
    raise ValueError(f"The following dependent variables are missing in the DataFrame: {missing_vars}")
if 'relabeled' not in df_result.columns:
    raise ValueError("The 'relabeled' column is not present in the DataFrame.")

# 6. MANOVA analysis
formula = f"{' + '.join(dependent_vars)} ~ relabeled"
try:
    manova = MANOVA.from_formula(formula, data=df_result)
    print(manova.mv_test())
except Exception as e:
    print(f"Error in MANOVA: {e}")

# 7. Group by 'relabeled' and compute the mean of each metric
grouped = df_result[dependent_vars].groupby(df_result['relabeled']).mean()

# 8. Feature normalization
scaler = StandardScaler()
scaled_features = scaler.fit_transform(grouped)

# 9. Compute Mahalanobis distance
cov_matrix = np.cov(scaled_features, rowvar=False)
inv_cov_matrix = np.linalg.pinv(cov_matrix)  # Pseudo-inverse for numerical stability
mahalanobis_distances = pdist(scaled_features, metric='mahalanobis', VI=inv_cov_matrix)

# 10. Square distance matrix
distance_matrix = squareform(mahalanobis_distances)

# 11. Agglomerative hierarchical clustering
linked = sch.linkage(distance_matrix, method='complete')

# 12. Normalize distances for the dendrogram
linked[:, 2] /= np.max(linked[:, 2])

# 13. Dendrogram
plt.figure(figsize=(14, 6))
dendro = sch.dendrogram(
    linked,
    labels=grouped.index.tolist(),
    leaf_rotation=90,
    leaf_font_size=10,
    color_threshold=0.4 * max(linked[:, 2])
)

cluster_colors = dendro['leaves_color_list']
leaf_order = dendro['leaves']  # Orden de las hojas en el dendrograma
data_color_map = {leaf_order[i]: cluster_colors[i] for i in range(len(leaf_order))}
print("Índice de los datos originales y su color asignado:")
for index, color in sorted(data_color_map.items()):
    print(f"Dato {index}: Color {color}")
print("Colores usados en el threshold:", cluster_colors)

plt.title("Dendrogram Based on Normalized Mahalanobis Distance", fontsize=14, pad=15)
plt.xlabel("Grasps", fontsize=12)
plt.ylabel("Normalized Mahalanobis Distance", fontsize=12)
plt.xticks(rotation=90)
plt.show()

In [None]:
import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt

# 2. Asociar los colores a los valores de relabeled
leaf_labels = [int(label) for label in dendro['ivl']]  # Convertimos etiquetas a enteros
color_dict = {leaf_labels[i]: dendro['leaves_color_list'][i] for i in range(len(leaf_labels))}

# 3. Preparar los datos del PCA
df_plot = grouped[['RMS mean', 'MNF mean', 'RMS std', 'MNF std']].copy()
df_plot['relabeled'] = grouped.index.astype(int)  # Asegurar que sean enteros

# 4. Crear el gráfico PCA con los colores del dendrograma
plt.figure(figsize=(8, 6))

for label in df_plot['relabeled'].unique():
    subset = df_plot[df_plot['relabeled'] == label]
    mean_x, mean_y = subset['RMS mean'].values[0], subset['MNF mean'].values[0]
    
    # Obtener color desde el diccionario
    color = color_dict.get(label, 'black')  # Si no se encuentra, usa negro como fallback

    # Graficar puntos
    plt.scatter(mean_x, mean_y, label=f'Relabeled {label}', color=color, edgecolor='black', s=100)

plt.xlabel('RMS Mean')
plt.ylabel('MNF Mean')
plt.title('Variabilidad entre Movimientos')
plt.legend()
plt.grid()
plt.show()



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import scipy.cluster.hierarchy as sch
from scipy.spatial.distance import pdist, squareform

# 1. Seleccionar las métricas de interés
metrics = ['RMS mean', 'RMS_STD mean', 'RMS std', 'RMS_STD std', 'MNF mean', 'MNF_STD mean', 'MNF std', 'MNF_STD std']
dependent_vars = [col for col in df_result.columns if col in metrics]

# 2. Excluir datos con relabeled == 0
df_result = df_result[df_result['relabeled'] != 0]

# 3. Verificar y eliminar columnas altamente correlacionadas
corr_matrix = df_result[dependent_vars].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.95)]
dependent_vars = [col for col in dependent_vars if col not in to_drop]

# 4. Manejo de valores faltantes
df_result = df_result.dropna(subset=dependent_vars + ['relabeled'])

# 5. Normalización de características
scaler = StandardScaler()
df_pca_scaled = scaler.fit_transform(df_result[dependent_vars])

# 6. Aplicar PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(df_pca_scaled)

# 7. Crear DataFrame con resultados
df_pca_result = pd.DataFrame(pca_result, columns=['PC1', 'PC2'])
df_pca_result['relabeled'] = df_result['relabeled'].astype(int)

# 8. Cálculo de distancias de Mahalanobis
cov_matrix = np.cov(df_pca_scaled, rowvar=False)
inv_cov_matrix = np.linalg.pinv(cov_matrix)
mahalanobis_distances = pdist(df_pca_scaled, metric='mahalanobis', VI=inv_cov_matrix)
distance_matrix = squareform(mahalanobis_distances)

# 9. Clustering jerárquico
linked = sch.linkage(distance_matrix, method='complete')
linked[:, 2] /= np.max(linked[:, 2])

# 11. Mapear colores del dendrograma
dendro_labels = [int(label) for label in dendro['ivl']]
unique_labels = sorted(df_pca_result['relabeled'].unique())
color_dict = {dendro_labels[i]: dendro['leaves_color_list'][i] for i in range(len(dendro_labels)) if dendro_labels[i] in unique_labels}

# 12. Graficar PCA con colores del dendrograma
plt.figure(figsize=(8, 6))
for label in unique_labels:
    subset = df_pca_result[df_pca_result['relabeled'] == label]
    color = color_dict.get(label, plt.cm.tab10(label % 10))
    plt.scatter(subset['PC1'], subset['PC2'], label=f'Relabeled {label}', color=color, edgecolor='black', s=100)

plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('PCA de Movimientos con Colores del Dendrograma')
plt.legend()
plt.grid()
plt.show()



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import scipy.cluster.hierarchy as sch
from scipy.spatial.distance import pdist, squareform

# 1. Seleccionar las métricas de interés
metrics = ['RMS mean', 'RMS_STD mean', 'RMS std', 'RMS_STD std', 'MNF mean', 'MNF_STD mean', 'MNF std', 'MNF_STD std']
dependent_vars = [col for col in df_result.columns if col in metrics]

# 2. Excluir datos con relabeled == 0
df_result = df_result[df_result['relabeled'] != 0]

# 3. Verificar y eliminar columnas altamente correlacionadas
corr_matrix = df_result[dependent_vars].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.95)]
dependent_vars = [col for col in dependent_vars if col not in to_drop]

# 4. Manejo de valores faltantes
df_result = df_result.dropna(subset=dependent_vars + ['relabeled'])

# 5. Normalización de características
scaler = StandardScaler()
df_pca_scaled = scaler.fit_transform(df_result[dependent_vars])

# 6. Aplicar PCA
pca = PCA(n_components=3)
pca_result = pca.fit_transform(df_pca_scaled)

# 7. Crear DataFrame con resultados
df_pca_result = pd.DataFrame(pca_result, columns=['PC1', 'PC2', 'PC3'])
df_pca_result['relabeled'] = df_result['relabeled'].astype(int)

# 8. Cálculo de distancias de Mahalanobis
cov_matrix = np.cov(df_pca_scaled, rowvar=False)
inv_cov_matrix = np.linalg.pinv(cov_matrix)
mahalanobis_distances = pdist(df_pca_scaled, metric='mahalanobis', VI=inv_cov_matrix)
distance_matrix = squareform(mahalanobis_distances)

# 9. Clustering jerárquico
linked = sch.linkage(distance_matrix, method='complete')
linked[:, 2] /= np.max(linked[:, 2])

# 11. Mapear colores del dendrograma
dendro_labels = [int(label) for label in dendro['ivl']]
unique_labels = sorted(df_pca_result['relabeled'].unique())
color_dict = {dendro_labels[i]: dendro['leaves_color_list'][i] for i in range(len(dendro_labels)) if dendro_labels[i] in unique_labels}

# 12. Graficar PCA en 3D con colores del dendrograma
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

for label in unique_labels:
    subset = df_pca_result[df_pca_result['relabeled'] == label]
    color = color_dict.get(label, plt.cm.tab10(label % 10))
    ax.scatter(subset['PC1'], subset['PC2'], subset['PC3'], label=f'Relabeled {label}', color=color, edgecolor='black', s=100)

ax.set_xlabel('PC1')
ax.set_ylabel('PC2')
ax.set_zlabel('PC3')
ax.set_title('PCA 3D de Movimientos con Colores del Dendrograma')
plt.legend()
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import scipy.cluster.hierarchy as sch
from scipy.spatial.distance import pdist, squareform
import plotly.graph_objects as go

# 1. Seleccionar las métricas de interés
metrics = ['RMS mean', 'RMS_STD mean', 'RMS std', 'RMS_STD std', 'MNF mean', 'MNF_STD mean', 'MNF std', 'MNF_STD std']
dependent_vars = [col for col in df_result.columns if col in metrics]

# 2. Excluir datos con relabeled == 0
df_result = df_result[df_result['relabeled'] != 0]

# 3. Verificar y eliminar columnas altamente correlacionadas
corr_matrix = df_result[dependent_vars].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.95)]
dependent_vars = [col for col in dependent_vars if col not in to_drop]

# 4. Manejo de valores faltantes
df_result = df_result.dropna(subset=dependent_vars + ['relabeled'])

# 5. Normalización de características
scaler = StandardScaler()
df_pca_scaled = scaler.fit_transform(df_result[dependent_vars])

# 6. Aplicar PCA
pca = PCA(n_components=3)
pca_result = pca.fit_transform(df_pca_scaled)

# 7. Crear DataFrame con resultados
df_pca_result = pd.DataFrame(pca_result, columns=['PC1', 'PC2', 'PC3'])
df_pca_result['relabeled'] = df_result['relabeled'].astype(int)

# 8. Cálculo de distancias de Mahalanobis
cov_matrix = np.cov(df_pca_scaled, rowvar=False)
inv_cov_matrix = np.linalg.pinv(cov_matrix)
mahalanobis_distances = pdist(df_pca_scaled, metric='mahalanobis', VI=inv_cov_matrix)
distance_matrix = squareform(mahalanobis_distances)

# 9. Clustering jerárquico
linked = sch.linkage(distance_matrix, method='complete')
linked[:, 2] /= np.max(linked[:, 2])

# 11. Mapear colores del dendrograma
dendro_labels = [int(label) for label in dendro['ivl']]
unique_labels = sorted(df_pca_result['relabeled'].unique())
color_dict = {
    int(dendro['ivl'][i]): matplotlib.colors.to_hex(dendro['leaves_color_list'][i])
    for i in range(len(dendro['ivl'])) if int(dendro['ivl'][i]) in unique_labels
}

# 12. Graficar PCA en 3D interactivo con Plotly
fig = go.Figure()

for label in unique_labels:
    subset = df_pca_result[df_pca_result['relabeled'] == label]
    color = color_dict.get(label, 'gray')
    fig.add_trace(go.Scatter3d(
        x=subset['PC1'],
        y=subset['PC2'],
        z=subset['PC3'],
        mode='markers',
        marker=dict(size=6, color=color, opacity=0.8),
        name=f'Relabeled {label}'
    ))

fig.update_layout(
    title='PCA 3D Interactivo de Movimientos con Colores del Dendrograma',
    scene=dict(
        xaxis_title='PC1',
        yaxis_title='PC2',
        zaxis_title='PC3'
    ),
    margin=dict(l=0, r=0, b=0, t=40)
)

fig.show()

Para todos los features

In [None]:
print(df_result.dtypes)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import scipy.cluster.hierarchy as sch
from scipy.spatial.distance import pdist, squareform

# 2. Excluir datos con relabeled == 0
dependent_vars = [col for col in dependent_vars if col in df_result.columns and np.issubdtype(df_result[col].dtype, np.number)]

df_result = df_result[df_result['relabeled'] != 0]

# 3. Verificar y eliminar columnas altamente correlacionadas
corr_matrix = df_result[dependent_vars].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.95)]
dependent_vars = [col for col in dependent_vars if col not in to_drop]

# 4. Manejo de valores faltantes
df_result = df_result.dropna(subset=dependent_vars + ['relabeled'])

# 5. Normalización de características
scaler = StandardScaler()
df_pca_scaled = scaler.fit_transform(df_result[dependent_vars])

# 6. Aplicar PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(df_pca_scaled)

# 7. Crear DataFrame con resultados
df_pca_result = pd.DataFrame(pca_result, columns=['PC1', 'PC2'])
df_pca_result['relabeled'] = df_result['relabeled'].astype(int)

# 8. Cálculo de distancias de Mahalanobis
cov_matrix = np.cov(df_pca_scaled, rowvar=False)
inv_cov_matrix = np.linalg.pinv(cov_matrix)
mahalanobis_distances = pdist(df_pca_scaled, metric='mahalanobis', VI=inv_cov_matrix)
distance_matrix = squareform(mahalanobis_distances)

# 9. Clustering jerárquico
linked = sch.linkage(distance_matrix, method='complete')
linked[:, 2] /= np.max(linked[:, 2])

# 11. Mapear colores del dendrograma
dendro_labels = [int(label) for label in dendro['ivl']]
unique_labels = sorted(df_pca_result['relabeled'].unique())
color_dict = {dendro_labels[i]: dendro['leaves_color_list'][i] for i in range(len(dendro_labels)) if dendro_labels[i] in unique_labels}

# 12. Graficar PCA con colores del dendrograma
plt.figure(figsize=(8, 6))
for label in unique_labels:
    subset = df_pca_result[df_pca_result['relabeled'] == label]
    color = color_dict.get(label, plt.cm.tab10(label % 10))
    plt.scatter(subset['PC1'], subset['PC2'], label=f'Relabeled {label}', color=color, edgecolor='black', s=100)

plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('PCA de Movimientos con Colores del Dendrograma')
plt.legend()
plt.grid()
plt.show()

In [None]:
# Obtener pesos de cada feature en cada componente principal
pca_loadings = pd.DataFrame(
    pca.components_.T,  # Transponer para tener features en filas y componentes en columnas
    index=dependent_vars,  # Nombres de las features
    columns=[f'PC{i+1}' for i in range(pca.n_components_)]  # Nombres de los componentes
)

# Mostrar los pesos
print("Pesos de las características en los componentes principales:")
display(pca_loadings)

In [None]:
# Opcional: Graficar las cargas de los primeros tres componentes
plt.figure(figsize=(10, 6))
pca_loadings.plot(kind='bar', figsize=(12, 6), cmap='viridis', edgecolor='black')
plt.title("Pesos de cada Feature en los Componentes Principales")
plt.xlabel("Características")
plt.ylabel("Carga")
plt.xticks(rotation=45, ha='right')
plt.legend(title="Componentes Principales")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
# Obtener pesos de cada feature en cada componente principal
pca_loadings = pd.DataFrame(
    pca.components_.T,  # Transponer para tener features en filas y componentes en columnas
    index=dependent_vars,  # Nombres de las features
    columns=[f'PC{i+1}' for i in range(pca.n_components_)]  # Nombres de los componentes
)

# Calcular el peso total de cada feature
explained_variance = pca.explained_variance_ratio_  # Varianza explicada por cada componente
feature_weights = (pca_loadings**2) @ explained_variance  # Ponderar cargas por la varianza explicada y sumar

# Crear DataFrame con los pesos totales
feature_weights_df = pd.DataFrame(feature_weights, columns=['Total Weight'])
feature_weights_df = feature_weights_df.sort_values(by='Total Weight', ascending=False)  # Ordenar de mayor a menor

# Mostrar resultados
print("Pesos totales de cada característica en el PCA:")
display(feature_weights_df)

# Graficar los pesos totales
plt.figure(figsize=(10, 6))
feature_weights_df.plot(kind='bar', figsize=(12, 6), cmap='viridis', edgecolor='black', legend=False)
plt.title("Peso Total de cada Feature en el PCA")
plt.xlabel("Características")
plt.ylabel("Peso Total")
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()


In [None]:
import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt

# 2. Asociar los colores a los valores de relabeled
leaf_labels = [int(label) for label in dendro['ivl']]  # Convertimos etiquetas a enteros
color_dict = {leaf_labels[i]: dendro['leaves_color_list'][i] for i in range(len(leaf_labels))}

# 3. Preparar los datos del PCA
df_plot = df_result[['RMS mean', 'MNF mean', 'RMS std', 'MNF std']].copy()

# Asegurar que se use la columna de 'relabeled' correcta si existe en df_result
if 'relabeled' in df_result.columns:
    df_plot['relabeled'] = df_result['relabeled'].astype(int)
else:
    raise KeyError("La columna 'relabeled' no se encuentra en df_result")

# 4. Crear el gráfico PCA con los colores del dendrograma
plt.figure(figsize=(8, 6))

# Iterar sobre cada punto y graficarlo con su respectivo color
for i in range(len(df_plot)):
    label = df_plot.iloc[i]['relabeled']
    mean_x, mean_y = df_plot.iloc[i]['RMS mean'], df_plot.iloc[i]['MNF mean']
    
    # Obtener color desde el diccionario con fallback a 'black'
    color = color_dict.get(label, 'black')

    # Graficar puntos individuales
    plt.scatter(mean_x, mean_y, color=color, edgecolor='black', s=100)

legend_labels = {label: color_dict.get(label, 'black') for label in df_plot['relabeled'].unique()}

for label, color in legend_labels.items():
    plt.scatter([], [], color=color, label=f'Relabeled {label}', edgecolor='black', s=100)

plt.xlabel('PCA1')
plt.ylabel('PCA2')
plt.title('Variabilidad entre Movimientos')
plt.legend()
plt.grid()
plt.show()


In [None]:
import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt

# 2. Asociar los colores a los valores de relabeled
leaf_labels = [int(label) for label in dendro['ivl']]  # Convertimos etiquetas a enteros
color_dict = {leaf_labels[i]: dendro['leaves_color_list'][i] for i in range(len(leaf_labels))}

# 3. Preparar los datos del PCA
df_plot = df_result[['RMS mean', 'MAV mean', 'RMS std', 'MAV std']].copy()

# Asegurar que se use la columna de 'relabeled' correcta si existe en df_result
if 'relabeled' in df_result.columns:
    df_plot['relabeled'] = df_result['relabeled'].astype(int)
else:
    raise KeyError("La columna 'relabeled' no se encuentra en df_result")

# 4. Crear el gráfico PCA con los colores del dendrograma
plt.figure(figsize=(8, 6))

# Iterar sobre cada punto y graficarlo con su respectivo color
for i in range(len(df_plot)):
    label = df_plot.iloc[i]['relabeled']
    mean_x, mean_y = df_plot.iloc[i]['RMS mean'], df_plot.iloc[i]['MAV mean']
    
    # Obtener color desde el diccionario con fallback a 'black'
    color = color_dict.get(label, 'black')

    # Graficar puntos individuales
    plt.scatter(mean_x, mean_y, color=color, edgecolor='black', s=100)

legend_labels = {label: color_dict.get(label, 'black') for label in df_plot['relabeled'].unique()}

for label, color in legend_labels.items():
    plt.scatter([], [], color=color, label=f'Relabeled {label}', edgecolor='black', s=100)

plt.xlabel('RMS Mean')
plt.ylabel('MAV Mean')
plt.title('Variabilidad entre Movimientos')
plt.legend()
plt.grid()
plt.show()

In [None]:
import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt

# 2. Asociar los colores a los valores de relabeled
leaf_labels = [int(label) for label in dendro['ivl']]  # Convertimos etiquetas a enteros
color_dict = {leaf_labels[i]: dendro['leaves_color_list'][i] for i in range(len(leaf_labels))}

# 3. Preparar los datos del PCA
df_plot = df_result[['RMS mean', 'MAV mean', 'RMS std', 'MAV std']].copy()

# Asegurar que se use la columna de 'relabeled' correcta si existe en df_result
if 'relabeled' in df_result.columns:
    df_plot['relabeled'] = df_result['relabeled'].astype(int)
else:
    raise KeyError("La columna 'relabeled' no se encuentra en df_result")

# 4. Crear el gráfico PCA con los colores del dendrograma
plt.figure(figsize=(8, 6))

# Iterar sobre cada punto y graficarlo con su respectivo color
for i in range(len(df_plot)):
    label = df_plot.iloc[i]['relabeled']
    mean_x, mean_y = df_plot.iloc[i]['RMS mean'], df_plot.iloc[i]['MAV mean']
    
    # Obtener color desde el diccionario con fallback a 'black'
    color = color_dict.get(label, 'black')

    # Graficar puntos individuales
    plt.scatter(mean_x, mean_y, color=color, edgecolor='black', s=100)

legend_labels = {label: color_dict.get(label, 'black') for label in df_plot['relabeled'].unique()}

for label, color in legend_labels.items():
    plt.scatter([], [], color=color, label=f'Relabeled {label}', edgecolor='black', s=100)

plt.xlabel('RMS Mean')
plt.ylabel('MAV Mean')
plt.title('Variabilidad entre Movimientos')
plt.legend()
plt.grid()
plt.show()

In [None]:
df_result.head()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.cm as cm

# 1. Seleccionar las características para PCA
df_pca = df_result[['RMS mean', 'MNF mean', 'RMS std', 'MNF std']].copy()

# 2. Normalizar los datos
scaler = StandardScaler()
df_pca_scaled = scaler.fit_transform(df_pca)

# 3. Aplicar PCA
pca = PCA(n_components=2)  # Reducimos a 2D para visualizar
pca_result = pca.fit_transform(df_pca_scaled)

# 4. Crear DataFrame con resultados
df_pca_result = pd.DataFrame(pca_result, columns=['PC1', 'PC2'])
df_pca_result['relabeled'] = combined_df['relabeled'].astype(int)  # Asegurar etiquetas enteras

# 5. Verificar que hay múltiples etiquetas en relabeled
unique_labels = df_pca_result['relabeled'].unique()
num_labels = len(unique_labels)

# 6. Generar colores automáticamente
cmap = cm.get_cmap('tab10', num_labels)
color_dict = {label: cmap(i) for i, label in enumerate(unique_labels)}

# 7. Graficar PCA con colores adecuados
plt.figure(figsize=(8, 6))
for label in unique_labels:
    subset = df_pca_result[df_pca_result['relabeled'] == label]
    color = color_dict[label]  # Obtener color correcto
    plt.scatter(subset['PC1'], subset['PC2'], label=f'Relabeled {label}', c=[color], edgecolor='black', s=100)

plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('PCA de Movimientos')
plt.legend()
plt.grid()
plt.show()


SEÑAL COMPLETA SIN ENVENTANADO 

In [None]:
# Definir los valores específicos de 'relabeled' que queremos filtrar
filtered_labels = [55, 2, 4, 14, 10, 16, 17, 19, 32]

# Filtrar el DataFrame agrupado
dataframe_windowing = grouped.loc[filtered_labels]

dataframe_windowing

DATAFRAME RELABELED DE INTERÉS

In [None]:
# Filtrar el DataFrame combinado
dataframe_windowing = combined_df[combined_df['relabeled'].isin(filtered_labels)]

# Mostrar el DataFrame filtrado
display(dataframe_windowing)


ENVENTANADO DE 100 ms 

In [None]:
# Definir la frecuencia de muestreo en Hz (ajustar según los datos)
sampling_rate = 2000  # Por ejemplo, 200 Hz significa 200 muestras por segundo

# Calcular el tamaño de la ventana en número de muestras
window_size = int(0.1 * sampling_rate)  # 100 ms = 0.1 segundos

# Lista para almacenar las ventanas
windowed_data = []

# Aplicar enventanado a cada grupo de 'relabeled'
for label, group in dataframe_windowing.groupby('relabeled'):
    # Seleccionar solo columnas numéricas
    numeric_cols = group.select_dtypes(include=['number'])
    
    # Aplicar el enventanado con una ventana deslizante
    for i in range(0, len(numeric_cols) - window_size + 1, window_size):
        window = numeric_cols.iloc[i:i + window_size]  # Extraer la ventana
        window_mean = window.mean()  # Obtener la media de la ventana
        
        # Agregar la columna 'relabeled' y otros datos categóricos si es necesario
        window_mean['relabeled'] = label  # Mantener la etiqueta
        windowed_data.append(window_mean)

# Convertir la lista en un DataFrame
dataframe_windowing_100 = pd.DataFrame(windowed_data)

# Mostrar el DataFrame enventanado
display(dataframe_windowing_100)


In [None]:
# List to store the calculated metrics for each channel
metrics_data = []

# Iterate over each subject and each identified movement (relabeled or stimulus)
for (relabeled), group in dataframe_windowing_100.groupby(['relabeled']):  # Change 'relabeled' to 'stimulus' if needed
    # Iterate over each EMG channel
    for channel in group.columns:  # Loop through all DataFrame columns
        if channel.startswith('Channel'):  # Filter only EMG signal columns
            # Get the signal values for the current channel
            channel_signal = group[channel].values
            
            # Compute EMG signal metrics for the current channel
            metrics = calculate_emg_metrics(channel_signal)
            
            # Append metadata and computed metrics to the list
            metrics_data.append({
                #"subject": subject,  # Subject identification
                "relabeled": relabeled,  # Movement identification (relabeled or stimulus)
                "channel": channel,  # EMG channel
                **metrics  # Unpack all computed metrics
            })

# Create a DataFrame containing all the obtained metrics
metrics_df_windowing_100 = pd.DataFrame(metrics_data)

# Reorder columns for better visualization (optional)
column_order = ["relabeled", "channel"] + list(metrics.keys())
metrics_df_windowing_100 = metrics_df_windowing_100[column_order]

# Display the DataFrame with the computed metrics
print("\nMetrics DataFrame by Channel, Subject, and Relabeled:")
display(metrics_df_windowing_100)

In [None]:
import os
import pandas as pd

# Definir la frecuencia de muestreo en Hz
sampling_rate = 2000  # Ajustar según los datos
window_size = int(0.1 * sampling_rate)  # 100 ms

# Nombre de la base de datos
database = 'DB4'
data_path = os.path.abspath(os.path.join('data', database))
subjects = [f's{i}' for i in range(1, 11)]

# Filtrar movimientos específicos
filtered_labels = [55, 2, 4, 14, 10, 16, 17, 19, 32]

# Iterar sobre cada sujeto
for subject in subjects:
    subject_dir = os.path.join(data_path, subject)
    
    # Iterar sobre archivos de ejercicios
    for exercise in ["E1", "E2", "E3"]:
        filename = f"{subject.upper()}_{exercise}_A1.mat"
        file_path = os.path.join(subject_dir, filename)
        
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            continue
        
        print(f"\nProcessing: {filename}")
        
        # Cargar datos del archivo .mat
        mat_data = src.loadmatNina(database, filename, subject=subject)
        print(f"Keys in mat_data: {mat_data.keys()}")
        
        # Construir DataFrame
        test_df, grasps_etiquetados = src.build_dataframe(
            mat_file=mat_data,
            database=database,
            filename=filename,
            rectify=False,
            normalize=True
        )
        
        # Filtrar solo los movimientos de interés
        test_df = test_df[test_df['relabeled'].isin(filtered_labels)]
        
        # Aplicar enventanado
        windowed_data = []
        for label, group in test_df.groupby('relabeled'):
            numeric_cols = group.select_dtypes(include=['number'])
            for i in range(0, len(numeric_cols) - window_size + 1, window_size):
                window = numeric_cols.iloc[i:i + window_size]
                window_mean = window.mean()
                window_mean['relabeled'] = label
                windowed_data.append(window_mean)
        
        dataframe_windowing_100 = pd.DataFrame(windowed_data)
        display(dataframe_windowing_100)
        
        # Graficar las señales de EMG
        for grasp in grasps_etiquetados:
            if grasp not in filtered_labels:
                continue
            try:
                if 'emg' not in mat_data:
                    raise KeyError(f"The key 'emg' is not in mat_data. Available keys: {mat_data.keys()}")
                
                emg_signal = mat_data['emg'][grasp]
                
                # Graficar usando la función de visualización de EMG
                src.plot_emg_data(
                    database=database,
                    mat_file=mat_data,
                    grasp_number=grasp,
                    interactive=False,
                    include_rest=True,
                    use_stimulus=False,
                    addFourier=False,
                    padding=100,
                    title=f"{filename} - Grasp {grasp}"
                )
            except KeyError as e:
                print(f"    Error: {str(e)}")
            except Exception as e:
                print(f"    Error processing grasp {grasp}: {str(e)}")
                continue


ENVENTANADO 200ms

In [None]:
# Definir la frecuencia de muestreo en Hz (ajustar según los datos)
sampling_rate = 2000  # Por ejemplo, 200 Hz significa 200 muestras por segundo

# Calcular el tamaño de la ventana en número de muestras
window_size = int(0.2 * sampling_rate)  # 200 ms = 0.2 segundos

# Lista para almacenar las ventanas
windowed_data = []

# Aplicar enventanado a cada grupo de 'relabeled'
for label, group in dataframe_windowing.groupby('relabeled'):
    # Seleccionar solo columnas numéricas
    numeric_cols = group.select_dtypes(include=['number'])
    
    # Aplicar el enventanado con una ventana deslizante
    for i in range(0, len(numeric_cols) - window_size + 1, window_size):
        window = numeric_cols.iloc[i:i + window_size]  # Extraer la ventana
        window_mean = window.mean()  # Obtener la media de la ventana
        
        # Agregar la columna 'relabeled' y otros datos categóricos si es necesario
        window_mean['relabeled'] = label  # Mantener la etiqueta
        windowed_data.append(window_mean)

# Convertir la lista en un DataFrame
dataframe_windowing_200 = pd.DataFrame(windowed_data)

# Mostrar el DataFrame enventanado
display(dataframe_windowing_200)

In [None]:
# List to store the calculated metrics for each channel
metrics_data = []

# Iterate over each subject and each identified movement (relabeled or stimulus)
for (relabeled), group in dataframe_windowing_200.groupby(['relabeled']):  # Change 'relabeled' to 'stimulus' if needed
    # Iterate over each EMG channel
    for channel in group.columns:  # Loop through all DataFrame columns
        if channel.startswith('Channel'):  # Filter only EMG signal columns
            # Get the signal values for the current channel
            channel_signal = group[channel].values
            
            # Compute EMG signal metrics for the current channel
            metrics = calculate_emg_metrics(channel_signal)
            
            # Append metadata and computed metrics to the list
            metrics_data.append({
                #"subject": subject,  # Subject identification
                "relabeled": relabeled,  # Movement identification (relabeled or stimulus)
                "channel": channel,  # EMG channel
                **metrics  # Unpack all computed metrics
            })

# Create a DataFrame containing all the obtained metrics
metrics_df_windowing_200 = pd.DataFrame(metrics_data)

# Reorder columns for better visualization (optional)
column_order = ["relabeled", "channel"] + list(metrics.keys())
metrics_df_windowing_200 = metrics_df_windowing_100[column_order]

# Display the DataFrame with the computed metrics
print("\nMetrics DataFrame by Channel, Subject, and Relabeled:")
display(metrics_df_windowing_200)

ENVENTANADO 300ms

In [None]:
# Definir la frecuencia de muestreo en Hz (ajustar según los datos)
sampling_rate = 2000  # Por ejemplo, 200 Hz significa 200 muestras por segundo

# Calcular el tamaño de la ventana en número de muestras
window_size = int(0.3 * sampling_rate)  # 200 ms = 0.2 segundos

# Lista para almacenar las ventanas
windowed_data = []

# Aplicar enventanado a cada grupo de 'relabeled'
for label, group in dataframe_windowing.groupby('relabeled'):
    # Seleccionar solo columnas numéricas
    numeric_cols = group.select_dtypes(include=['number'])
    
    # Aplicar el enventanado con una ventana deslizante
    for i in range(0, len(numeric_cols) - window_size + 1, window_size):
        window = numeric_cols.iloc[i:i + window_size]  # Extraer la ventana
        window_mean = window.mean()  # Obtener la media de la ventana
        
        # Agregar la columna 'relabeled' y otros datos categóricos si es necesario
        window_mean['relabeled'] = label  # Mantener la etiqueta
        windowed_data.append(window_mean)

# Convertir la lista en un DataFrame
dataframe_windowing_300 = pd.DataFrame(windowed_data)

# Mostrar el DataFrame enventanado
display(dataframe_windowing_200)

In [None]:
# List to store the calculated metrics for each channel
metrics_data = []

# Iterate over each subject and each identified movement (relabeled or stimulus)
for (relabeled), group in dataframe_windowing_300.groupby(['relabeled']):  # Change 'relabeled' to 'stimulus' if needed
    # Iterate over each EMG channel
    for channel in group.columns:  # Loop through all DataFrame columns
        if channel.startswith('Channel'):  # Filter only EMG signal columns
            # Get the signal values for the current channel
            channel_signal = group[channel].values
            
            # Compute EMG signal metrics for the current channel
            metrics = calculate_emg_metrics(channel_signal)
            
            # Append metadata and computed metrics to the list
            metrics_data.append({
                #"subject": subject,  # Subject identification
                "relabeled": relabeled,  # Movement identification (relabeled or stimulus)
                "channel": channel,  # EMG channel
                **metrics  # Unpack all computed metrics
            })

# Create a DataFrame containing all the obtained metrics
metrics_df_windowing_300 = pd.DataFrame(metrics_data)

# Reorder columns for better visualization (optional)
column_order = ["relabeled", "channel"] + list(metrics.keys())
metrics_df_windowing_300 = metrics_df_windowing_100[column_order]

# Display the DataFrame with the computed metrics
print("\nMetrics DataFrame by Channel, Subject, and Relabeled:")
display(metrics_df_windowing_300)