# Setup the notebook

In [None]:
from matplotlib.patches import Rectangle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set() #sets the matplotlib style to seaborn style

from scipy.ndimage import convolve1d
from scipy.io import loadmat 
from scipy.ndimage import convolve1d
from scipy.signal import butter
from scipy.signal import sosfiltfilt
from scipy.signal import welch
import pandas as pd
import os


from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

np.random.seed(42)

In [1]:
#We define the parameters RERUN to rerun all the notebook
RERUN = False

# Load and preprocess the data

In [None]:
# Root directory containing the s1, s2, ..., s27 folders
root_dir = "./data"

# Dictionary to store all loaded .mat files
all_data = {}

# Loop through directories s1 to s27
for i in range(1, 28):
    subdir = os.path.join(root_dir, f"s{i}")  # Construct subdirectory path
    
    if os.path.exists(subdir):  # Ensure the subdirectory exists
        # Load all .mat files in the subdirectory
        for file in os.listdir(subdir):
            if file.endswith(".mat"):  # Only process .mat files
                file_path = os.path.join(subdir, file)
                all_data[file] = loadmat(file_path)  # Use the filename as the key

print("All files loaded!")


All files loaded!


In [3]:
# Define parameters
mov_mean_length = 25
mov_mean_weights = np.ones(mov_mean_length) / mov_mean_length

# Initialize a dictionary to store the processed data
processed_data = {}

# Iterate over all subjects/files
for subject, data in all_data.items():
    print(f"Processing {subject}...")
    
    # Extract variables from the .mat file
    stimulus = data["stimulus"].flatten()  # Ensure 1D
    repetition = data["repetition"].flatten()  # Ensure 1D
    emg = data["emg"]  # EMG signals (assume it's 2D)

    # Get the number of stimuli and repetitions (assume these are known or can be deduced)
    n_stimuli = int(np.max(stimulus))  # Number of unique stimuli
    n_repetitions = int(np.max(repetition))  # Number of unique repetitions

    # Initialize data structures for this subject
    emg_windows = [[None for _ in range(n_repetitions)] for _ in range(n_stimuli)]
    emg_envelopes = [[None for _ in range(n_repetitions)] for _ in range(n_stimuli)]

    # Process each stimulus and repetition
    for stimuli_idx in range(n_stimuli):
        for repetition_idx in range(n_repetitions):
            # Logical indexing to extract the corresponding EMG data
            idx = np.logical_and(
                stimulus == stimuli_idx + 1, 
                repetition == repetition_idx + 1
            ).flatten()
            
            # Extract EMG window and compute its envelope using moving average
            emg_windows[stimuli_idx][repetition_idx] = emg[idx, :]
            emg_envelopes[stimuli_idx][repetition_idx] = convolve1d(
                emg_windows[stimuli_idx][repetition_idx], 
                mov_mean_weights, 
                axis=0
            )
    
    # Store processed data for this subject
    processed_data[subject] = {
        "emg_windows": emg_windows,
        "emg_envelopes": emg_envelopes
    }

print("Preprocessing completed for all subjects!")


NameError: name 'np' is not defined