In [11]:
# ~~~~~~~~~~~~~~ Libraries
import sys, os
import mne # Python package for processing and analyzing electrophysiological data
import numpy as np
from glob import glob # look for all the pathnames matching a specified pattern according to the rules
import matplotlib.pyplot as plt
from mne.preprocessing import ICA # ICA (Independent Component Analysis) algorithm, which is for artifact removal
import json
import re

from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from scipy.stats import permutation_test
import matplotlib.pyplot as plt

# Average time series for one channel

In [None]:

# Set the working directory
path = "/u/kazma/MINT/data/interim/visual"
folders = [f for f in os.listdir(path) if os.path.isdir(os.path.join(path, f))]
folders_sorted = sorted(folders, key=lambda x: int(re.search(r'\d+', x).group())) # Sort the folders based on the numeric part after "sub-"



for sub_loop in folders_sorted:

    # subject folder 
    sub_filename = os.path.join(path, sub_loop, 'epochs-epo.fif')

    # Load epochs
    epochs = mne.read_epochs(sub_filename, preload=True)
    print(f"{sub_loop} is analyzed")

    # Get the condition names
    condition_names = list(epochs.event_id.keys())
    evoked_list = []

    # Compute the averaged epoch for each condition and each channel
    for condition in condition_names:
        evoked = epochs[condition].average()  # Average the epochs for the condition
        evoked_list.append(evoked)

    # Store 6 vectors, representing averaged vector for each condition
    averaged_data = {}

    # Specify the channel to plot (e.g., channel name 'Cz')
    channel_name = 'F3'  # Replace with the channel of your choice

    # Figure
    plt.figure(figsize=(8, 4))

    # Define colors for the conditions
    colors = plt.cm.tab10(np.linspace(0, 1, 6))

    for i, evoked in enumerate(evoked_list):
        # Extract data for the specified channel
        channel_index = evoked.ch_names.index(channel_name)  # Get index of the channel
        channel_data = evoked.data[channel_index]  # Get the data for the specific channel
        times = evoked.times * 1000  # Convert time to milliseconds

        # Store the data
        averaged_data[condition_names[i]] = channel_data
        
        # Plot the channel data
        plt.plot(times, channel_data, color=colors[i], linewidth=3, alpha=0.5) 
        plt.plot(times, channel_data, label=condition_names[i], color=colors[i], linewidth=2)  # Plot time series

    # Customize the plot
    plt.title('Average Time Series for Channel: ' + channel_name)
    plt.xlabel('Time (ms)')
    plt.ylabel('Amplitude (µV)')
    plt.axhline(0, color='black', linestyle='--', linewidth=0.5)  # Add a horizontal line at y=0
    plt.axvline(0, color='black', linestyle='-', linewidth=1)  # Add a vertical line at t=0
    plt.xlim(-20, 900)  # Set x-axis limits
    # plt.ylim(-2.0e-8, 2.0e-8)  # Set y-axis limits
    plt.legend(ncol=3, loc='lower left', framealpha=1)

    # Set the box line color to gray
    ax = plt.gca()
    ax.spines['top'].set_color('gray')
    ax.spines['right'].set_color('gray')
    ax.spines['bottom'].set_color('gray')
    ax.spines['left'].set_color('gray')

    plt.tight_layout()
    plt.show()



## Time-resolved decdoing

In [None]:
# Set the working directory
path = "/u/kazma/MINT/data/interim/visual"
folders = [f for f in os.listdir(path) if os.path.isdir(os.path.join(path, f))]
folders_sorted = sorted(folders, key=lambda x: int(re.search(r'\d+', x).group())) # Sort the folders based on the numeric part after "sub-"

# Initialize a dictionary to store decoding_dict for each subject
all_subjects_data = {}

# Subject loop
for sub_loop in folders_sorted:

    # subject folder 
    sub_filename = os.path.join(path, sub_loop, 'epochs-epo.fif')

    # Load epochs
    epochs = mne.read_epochs(sub_filename, preload=True)
    print(f"{sub_loop} is analyzed")

    # Get the number of channels and the list of conditions
    n_channels = len(epochs.ch_names)
    conditions = list(epochs.event_id.keys())

    # Initialize the decoding dictionary with each condition containing a list of flattened sample windows
    decoding_dict = {}

    # Number of samples per time window
    window_size = 5

    # Loop over each condition and extract 5-sample windows across the epoch duration
    for condition in conditions:
        evoked = epochs[condition].average()  # Average epochs for the specific condition
        n_channels, n_samples = evoked.data.shape
        
        # Initialize a list to store flattened windows for each condition
        flattened_windows = []
        
        # Loop over the epoch in steps of `window_size` to extract each 5-sample window
        for start in range(0, n_samples, window_size):
            # Check if there are enough samples left for a full window
            if start + window_size <= n_samples:
                # Extract the 5-sample window
                window_samples = evoked.data[:, start:start + window_size]  # Shape: (n_channels, 5)
                
                # Flatten the window to a (n_channels * 5, 1) shape and convert to 1D array
                flattened_vector = window_samples.flatten()  # Shape: (n_channels * 5,)
                
                # Append the flattened vector to the list for this condition
                flattened_windows.append(flattened_vector)
        
        # Convert list of flattened windows to a 2D array with each row as a flattened window
        decoding_dict[condition] = np.array(flattened_windows)  # Shape: (n_windows, n_channels * 5)


    all_subjects_data[sub_loop] = decoding_dict

    # Print shapes to confirm
    for condition, data in decoding_dict.items():
        print(f"Condition: {condition}, Shape of data: {data.shape}, so the total number of window is {data.shape[0]}, and {data.shape[1]} = n_channels * 5 samples")




# Cross-validation schema (LOOCV)

In [10]:
x = all_subjects_data['sub-01_ses-01']['numerosity 1'][0]

In [12]:
# paramters
n_splits = 7 # searate the data
