In [3]:
import os
import scipy.io as sio
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score
import numpy as np
import glob

# Function to extract gesture label from the filename
def extract_participant_from_filename(filepath):
    filename = os.path.basename(filepath)
    parts = filename.split('_')
    for part in parts:
        if 'participant' in part:
            participant_id = part.replace('participant', '')
            return int(participant_id)  # Convert to integer
    return None  # If no participant ID found
# Function to load and process a single .mat file
def load_and_process_file(filepath, target_length):
    mat_data = sio.loadmat(filepath)
    data_emg = mat_data['data_emg']
    
    # Handle NaNs and Infinities
    data_emg = np.nan_to_num(data_emg, nan=0.0, posinf=0.0, neginf=0.0)
    
    # Pad or truncate the data to the target length
    if data_emg.shape[0] < target_length:
        padded_data = np.pad(data_emg, ((0, target_length - data_emg.shape[0]), (0, 0)), 'constant', constant_values=0)
    else:
        padded_data = data_emg[:target_length]
    
    # Flatten the data for SVM input
    return padded_data.flatten()

# Process a list of files and return processed data and labels
def process_files(file_list, target_length):
    X = []
    y = []
    
    for filepath in file_list:
        X.append(load_and_process_file(filepath, target_length))
        y.append(extract_participant_from_filename(filepath))
    
    return np.array(X), np.array(y)

# Path to the root folder where all .mat files are stored
root_folder = 's1_s2_s3_matfiles_10part'  # Update with your folder path

# Find all .mat files in the folder
all_mat_files = glob.glob(os.path.join(root_folder, '*.mat'))

# Collect all corresponding labels
all_labels = [extract_participant_from_filename(mat_file) for mat_file in all_mat_files]

# Target length for EMG signals (adjust based on your data)
target_length = 10240  # Modify this as needed

# Split the dataset into train and test sets
train_files, test_files, train_labels, test_labels = train_test_split(all_mat_files, all_labels, test_size=0.33, random_state=42)

# Process training and test data
X_train, y_train = process_files(train_files, target_length)
X_test, y_test = process_files(test_files, target_length)
print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of X_test: {X_test.shape}")
# Initialize PCA for dimensionality reduction
pca = PCA(n_components=100)  # Adjust the number of components as needed

# Apply PCA to the training and test sets
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

svm = SVC(kernel='linear', C=1, gamma='scale')

# Set up 5-fold cross-validation on the training set
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Perform cross-validation
cross_val_scores = cross_val_score(svm, X_train, y_train, cv=cv, scoring='accuracy')

# Fit the model on the entire training set after cross-validation
svm.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Output cross-validation results and final accuracy
print("Cross-Validation Accuracy Scores:", cross_val_scores)
print("Mean Cross-Validation Accuracy:", np.mean(cross_val_scores))
print("Test Set Accuracy:", accuracy)

# # --- Hyperparameter Tuning with GridSearchCV ---

# from sklearn.model_selection import GridSearchCV

# param_grid = {
#     'C': [0.01, 0.1, 1, 10, 100],
#     'gamma': ['scale', 'auto', 0.01, 0.001, 0.0001],
#     'kernel': ['rbf']
# }

# grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2, cv=5)
# grid.fit(X_train, y_train)

# print("Best parameters:", grid.best_params_)
# y_pred = grid.predict(X_test)
# accuracy = accuracy_score(y_test, y_pred)
# print("Optimized Accuracy:", accuracy)

Shape of X_train: (2386, 327680)
Shape of X_test: (1176, 327680)
Cross-Validation Accuracy Scores: [0.25753769 0.22515723 0.24779874]
Mean Cross-Validation Accuracy: 0.24349788776165945
Test Set Accuracy: 0.2619047619047619
