In [1]:
from sklearnex import patch_sklearn

patch_sklearn()

import numpy as np
import scipy.io
import os

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.covariance import LedoitWolf

from pyriemann.estimation import Covariances
from pyriemann.classification import TSclassifier
from pyriemann.spatialfilters import CSP
from pyriemann.tangentspace import TangentSpace
from pyriemann.utils.mean import mean_logeuclid
#from pyriemann.classification import SVC

from sklearn.svm import SVC

import h5py

import time

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
start_time = time.time()

In [3]:
LoadingPath_Non='/home/nicole/Documents/AudioCueWalking_analysis/Variables/AdaptVsNon/LOSO_CV/Dataset/Ns_100/Delay'

SavingPath = '/home/nicole/Documents/AudioCueWalking_analysis/Variables/AdaptVsNon/LOSO_CV/Dataset/Ns_100/Delay/Transformed/TangentSpace_PS/Transformed/SpatFiltSignals'

In [4]:
Nc = 108

# Functions

In [5]:
import os
import h5py
import numpy as np
import scipy.io

def load_subject_data(loading_path, subject_id, n_components=108):
    """
    Load (raw)training and testing datasets for a given subject.
    
    Parameters:
        loading_path (str): Path to the directory containing the dataset files.
        subject_id (int): Subject ID (0-based index).
        n_components (int): Number of components/features to extract.
        
    Returns:
        dict: A dictionary containing all loaded datasets and features.
    """
    # File paths
    training_projected_path = os.path.join(loading_path, 'TrainingProjected_sub017_018.mat')
    testing_projected_path = os.path.join(loading_path, 'TestingProjected_sub017_018.mat')

    # Load Training Projected Dataset
    with h5py.File(training_projected_path, 'r') as f_training_projected:
        training_projected = f_training_projected['TrainingProjected']
        subject_cell_ref = training_projected[subject_id, 0]
        subject_data = f_training_projected[subject_cell_ref]
        X_train_projected = np.transpose(subject_data['x'][:], (0, 2, 1))
        Y_train_projected = np.squeeze(subject_data['y'][:])

    # Load Testing Projected Dataset
    # Advance
    # f_testing_projected = scipy.io.loadmat(testing_projected_path)
    # testing_projected = f_testing_projected['TestingProjected'][0, subject_id]
    # X_test_projected = np.transpose(np.array(testing_projected['x'][0][0]), (2, 0, 1))
    # #X_test_projected = np.abs(X_test_projected) #Added pre-aligment data is complex
    # Y_test_projected = np.squeeze(np.array(testing_projected['y'][0][0]))
    
    # Delay
    with h5py.File(testing_projected_path, 'r') as f_testing_projected:
        testing_projected = f_testing_projected['TestingProjected']
        subject_cell_ref = testing_projected[subject_id, 0]
        subject_data = f_testing_projected[subject_cell_ref]
        X_test_projected = np.transpose(subject_data['x'][:], (0, 2, 1))
        Y_test_projected = np.squeeze(subject_data['y'][:])


    # Organize results into a dictionary
    data = {
        "X_train_projected": X_train_projected,
        "Y_train_projected": Y_train_projected,
        "X_test_projected": X_test_projected,
        "Y_test_projected": Y_test_projected,
    }

    return data


In [6]:
# Recentering function 
import numpy as np
from scipy.linalg import fractional_matrix_power
from pyriemann.utils.tangentspace import tangent_space

def Recentering_TSProjection(C, M):
    """
    1. Centers each covariance matrix: M^(-1/2) C M^(-1/2))
    2. Projects to TS (& vectorises) using identity matrix as 'new centre'
    
    Parameters:
        C (ndarray): set of covariance matrices (Nt, Nc, Nc) 
        M (ndarray): Reference matrix (Taken as log-eulid mean)
    
    Returns:
        ndarray: The log-transformed matrix.
    """
    
    Nt, _, _ = C.shape # Number of trials 
    
    M_inv_sqrt = fractional_matrix_power(M, -0.5) # Compute M^(-1/2)
    
    # 1. Recentering step (on each covariance matrix) 
    X_centered = np.array([M_inv_sqrt @ C[i] @ M_inv_sqrt for i in range(Nt)])
    #print(X_centered.shape)
    
    #2. TS projection 
    Centered_M = np.eye(C.shape[1]) # New center after centering is the identity matrix 
    X_TS = tangent_space(X_centered, Centered_M) # Project to TS (& vectorise)

    
    return X_TS

In [7]:
def Rescaling(C):
    """
    Rescale all tangent vectors: c_tilde = c / (1/N_t * sum_n ||c_n||)

    Parameters:
        C (ndarray): Collection of tangent vectors (c_n) to be rescaled

    Returns:
        ndarray: Rescaled tangents vectors `c_tilde`.
    """
    
    # Check if C contains any 'nan' and replace with 0.0
    if np.isnan(C).any():
        C = np.nan_to_num(C, nan=0.0)
    
    Nt = C.shape[0] # Number of vectors
    
    # Compute the average magnitude (1/N_t * sum(||c_n||))
    avg_magnitude = np.sum(np.linalg.norm(C, axis=1)) / Nt

    # Normalize all vectors simultaneously
    c_tilde  = C / avg_magnitude

    return c_tilde 

In [8]:
# Rotation Functions


# Mean per class 
def ClassMean(Dataset, Labels):
    """
    Compute the class-wise mean based on the provided equation:
        s̄_k = (1 / N_k) * Σ s̃_i  (for y_i = k)
    
    Parameters:
        Dataset (ndarray): Tangent vectors, Nt = number of trials, Nf = features per trial.
        Labels (ndarray): The corresponding labels for the dataset, of shape (Nt,).
        
    Returns:
        ndarray: matrix of shape (k, Nf) (k = number of classes)
    """
    unique_classes = np.unique(Labels)  # Find unique classes 
    class_means = []

    for cls in unique_classes: # for each class
        # Select data corresponding to the current class
        class_data = Dataset[Labels == cls]  # Filter rows where label == cls
        N_k = class_data.shape[0]  # Number of trials for class k
        
        # Compute mean
        class_mean = np.mean(class_data, axis=0)
        
        # Store result
        class_means.append(class_mean)
    
    C_bar = np.column_stack(class_means)

    return C_bar

# Load subject's dataset

In [9]:
TransformedTraining = {}
TransformedTesting = {}

for sub_id in [16, 17]:
    print(f"Subject {sub_id}:")
    subject_data = load_subject_data(LoadingPath_Non, sub_id, n_components=Nc)

    X_train_dataset = subject_data["X_train_projected"]
    Y_train_dataset = subject_data["Y_train_projected"]
    X_test_dataset = subject_data["X_test_projected"]
    Y_test_dataset = subject_data["Y_test_projected"]

    # Sanity check
    print("Loaded datatsets shape: ")
    print(X_train_dataset.shape)
    print(X_test_dataset.shape)
    
# ---------------------- 1. Recentre - recentres all trials to have centre of mass equal to identity matrix, followed by tangent space mapping (ref = I) ---------------------- 
    # Find covariance matrices
    cov_estimator = Covariances(estimator='lwf')
    Train_cov = cov_estimator.transform(X_train_dataset)
    Test_cov = cov_estimator.transform(X_test_dataset)

    # Find log-euclid centre ('M')
    Train_M = mean_logeuclid(Train_cov)
    Test_M = mean_logeuclid(Test_cov)

    # Centre and project to TS (& vectorise)
    Train_Centered = Recentering_TSProjection(Train_cov, Train_M)
    Test_Centered = Recentering_TSProjection(Test_cov, Test_M)

    #Sanity Check
    print("Centred shapes: ")
    print(Train_Centered.shape)
    print(Test_Centered.shape)

# ---------------------- 2. Rescale - match matrix dispersion around mean in both 'source' and 'target' (setting the average norm within set to be 1) ----------------------
    Train_Rescale = Rescaling(Train_Centered)
    Test_Rescale = Rescaling(Test_Centered)

    #Sanity Check
    print("Rescaled shapes: ")
    print(Train_Rescale.shape)
    print(Test_Rescale.shape)

# ---------------------- 3. Rotation (Alignment of 'target' vectors) - Align each mean of each class as much as possible (using Eulidea Procrustes procedure)  ----------------------
# ---------------------- Once target vectors are aligned, can be used with models trained using 'Train_Rescale' ---------------------- 
# Calculate anchor points for each class 
    Train_AnchorPoints=ClassMean(Train_Rescale, Y_train_dataset)
    Test_AnchorPoints=ClassMean(Test_Rescale, Y_test_dataset)

    # Sanity check
    print("Anchor Points shape: ")
    print(Train_AnchorPoints.shape)
    print(Test_AnchorPoints.shape)

    # Cross-product matrix 
    c_st = Train_AnchorPoints @ Test_AnchorPoints.T

    #Sanity check - should be (Nf, Nf)
    print("c_st shape: ")
    print(c_st.shape) 

    # Perform Singular value decomposition on c_st
    U, D, VT = np.linalg.svd(c_st, full_matrices=False)

    # Find number of Nv vectors that explains 99.9% varaince 
    explained_variance = D**2
    total_variance = np.sum(explained_variance)
    cumulative_explained_variance = np.cumsum(explained_variance) / total_variance
    Nv = np.argmax(cumulative_explained_variance >= 0.999) + 1  # +1 because of 0-based indexing

    U_tilde = U[:, :Nv] # Truncate using only Nv vectors
    VT_tilde = VT[:Nv, :]

    # Sanity check 
    print("truncated U and VT: ")
    print(U_tilde.shape)
    print(VT_tilde.shape)

    print(Test_Rescale.shape)

    Nt = Test_Rescale.shape[0]
    Test_Rotated = np.zeros_like(Test_Rescale) # Initialise storage

    # Align each testing trial
    for t in range(Nt):
        Test_Rotated[t] = U_tilde @ VT_tilde @ Test_Rescale[t]

    print("Rotated Test: ")
    print(Test_Rotated.shape)
    
    # Store per subject (transformed training and testing datasets)
    TransformedTraining[sub_id] = Train_Rescale
    TransformedTesting[sub_id] = Test_Rotated

Subject 16:
Loaded datatsets shape: 
(51991, 108, 100)
(1825, 108, 100)
Centred shapes: 
(51991, 5886)
(1825, 5886)
Rescaled shapes: 
(51991, 5886)
(1825, 5886)
Anchor Points shape: 
(5886, 2)
(5886, 2)
c_st shape: 
(5886, 5886)
truncated U and VT: 
(5886, 2)
(2, 5886)
(1825, 5886)
Rotated Test: 
(1825, 5886)
Subject 17:
Loaded datatsets shape: 
(51394, 108, 100)
(2422, 108, 100)
Centred shapes: 
(51394, 5886)
(2422, 5886)
Rescaled shapes: 
(51394, 5886)
(2422, 5886)
Anchor Points shape: 
(5886, 2)
(5886, 2)
c_st shape: 
(5886, 5886)
truncated U and VT: 
(5886, 2)
(2, 5886)
(2422, 5886)
Rotated Test: 
(2422, 5886)


In [10]:
end_time = time.time()
elapsed_time = end_time - start_time
print("Elapsed time:", elapsed_time/60, "minutes")

Elapsed time: 12.577589960892995 minutes


In [11]:
SavingPath

'/home/nicole/Documents/AudioCueWalking_analysis/Variables/AdaptVsNon/LOSO_CV/Dataset/Ns_100/Delay/Transformed/TangentSpace_PS/Transformed/SpatFiltSignals'

In [12]:
import os
import pickle

# Define saving paths for the variables
training_saving_path = os.path.join(SavingPath, 'TransformedTraining_sub017_018.pkl')
testing_saving_path = os.path.join(SavingPath, 'TransformedTesting_sub017_018.pkl')

# Save TransformedTraining
with open(training_saving_path, 'wb') as file:
    pickle.dump(TransformedTraining, file)

# Save TransformedTesting
with open(testing_saving_path, 'wb') as file:
    pickle.dump(TransformedTesting, file)

# Extra/Old scripts