Here we tried to modify our decoding schemes:

1. We try to convert our multiclass to binary (Generating our I1 matrices)
2. Try new splitting schemes (RepeatedStratifiedKFold)

In [1]:
%load_ext autoreload
%autoreload 2

# Imports

In [2]:
!pip install --no-index scikit-learn seaborn

Looking in links: /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo2023/x86-64-v3, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo2023/generic, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic
Processing /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo2023/generic/scikit_learn-1.6.1+computecanada-cp311-cp311-linux_x86_64.whl
Processing /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic/seaborn-0.13.2+computecanada-py3-none-any.whl
Processing /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic/threadpoolctl-3.6.0+computecanada-py3-none-any.whl (from scikit-learn)
Installing collected packages: threadpoolctl, scikit-learn, seaborn
Successfully installed scikit-learn-1.6.1+computecanada seaborn-0.13.2+computecanada threadpoolctl-3.6.0+computecanada


In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from scipy.stats import zscore

from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score

from clean_functions import load_neural_data, make_a_plot_beautiful, transpose_neural_data, select_contents, generate_times, get_percent_correct_from_proba

lbls_order = ['bear', 'elephant', 'person', 'car', 'dog', 'apple', 'chair', 'plane', 'bird', 'zebra']

# Load Data

In [4]:
# Load the neural data
neural_data_path = "./data/mag_wm_data.mat"
contents = [f"r{i}" for i in range(1, 7)]

neural_data = load_neural_data(file_path=neural_data_path, contents=contents)

# preprocess neural data
neural_data = transpose_neural_data(neural_data) # reps, imgs, ns, tb

## Preprocess Data to prepare them for training

In [5]:
# load labels
lbls = pd.read_csv("/home/soroush1/projects/def-kohitij/soroush1/WM_age_of_ultron/data/hvm200/working_memory_images_labels.csv")
lbls = lbls.values

task = "active"
delay = 400
content = select_contents(delay=delay, task=task)
neural_ml_data = np.nanmean(neural_data[content], axis=0) # taking avg over reps

# select one time bins
neural_ml_data = np.nanmean(neural_ml_data[:, :, 2:6], axis=2) # taking avg over time bins 60-180ms
neural_ml_data = zscore(neural_ml_data, axis=1, ddof=1) # apply zscore on each columns (neural sites)

print(f"X: {neural_ml_data.shape}")
print(f"y: {lbls.shape}")

def get_data(task: str, delay: int, timebins = None):

    # load labels
    lbls = pd.read_csv("/home/soroush1/projects/def-kohitij/soroush1/WM_age_of_ultron/data/hvm200/working_memory_images_labels.csv")
    lbls = lbls.values

    # select appropiate data
    content = select_contents(delay=delay, task=task)
    neural_ml_data = np.nanmean(neural_data[content], axis=0) # taking avg over reps

    # select time bins
    if timebins is None:  
        neural_ml_data = np.nanmean(neural_ml_data[:, :, :], axis=2) # taking avg over time bins 60-180ms

    if isinstance(timebins, int):
        neural_ml_data = neural_ml_data[:, :, timebins]

    if isinstance(timebins, list):
        neural_ml_data = np.nanmean(neural_ml_data[:, :, timebins[0]:timebins[1]], axis=2)

    neural_ml_data = zscore(neural_ml_data, axis=1, ddof=1) # apply zscore on each columns (neural sites)

    return neural_ml_data, lbls

X: (200, 192)
y: (200, 1)


## Prepare model

In [6]:
# Define the classifier with equivalent parameters
clf = SGDClassifier(
    loss="log",            # Logistic regression
    penalty="l2",          # Ridge regression (L2 regularization)
    alpha="auto",
    max_iter=20000,        # Iteration limit
    tol=None,              # No early stopping
    learning_rate="optimal"
)

multi_clf = OneVsRestClassifier(clf)

# Initialize Kfold
kf = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)

def get_model():
    clf = SGDClassifier(
    loss="log_loss",            # Logistic regression
    penalty="l2",          # Ridge regression (L2 regularization)
    alpha=0.0001,
    max_iter=20000,        # Iteration limit
    tol=None,              # No early stopping
    learning_rate="optimal"
    )
    
    return OneVsRestClassifier(clf)
    

## Training Process

In [7]:
accs = np.zeros(2) # number of kfold

for fold, (tr_idx, val_idx) in enumerate(kf.split(neural_ml_data, lbls)):
    print(f"Fold {fold + 1}")
    tr = neural_ml_data[tr_idx]
    y_tr = lbls[tr_idx]
    val = neural_ml_data[val_idx]
    y_val = lbls[val_idx]

    print(f"\ty_tr: {np.unique(y_tr)} {y_tr.shape}")
    print(f"\ty_val: {np.unique(y_val)} {y_val.shape}")

    multi_clf = get_model()

    multi_clf.fit(tr, y_tr)
    y_preds = multi_clf.predict(val)
    accs[fold] = accuracy_score(y_val, y_preds)
    
accs

Fold 1
	y_tr: ['apple' 'bear' 'bird' 'car' 'chair' 'dog' 'elephant' 'person' 'plane'
 'zebra'] (100, 1)
	y_val: ['apple' 'bear' 'bird' 'car' 'chair' 'dog' 'elephant' 'person' 'plane'
 'zebra'] (100, 1)
Fold 2
	y_tr: ['apple' 'bear' 'bird' 'car' 'chair' 'dog' 'elephant' 'person' 'plane'
 'zebra'] (100, 1)
	y_val: ['apple' 'bear' 'bird' 'car' 'chair' 'dog' 'elephant' 'person' 'plane'
 'zebra'] (100, 1)


array([0.46, 0.49])

# Running Decoder over the neural data

In [None]:
total_trials = 200
indices = np.arange(0, total_trials)
indices[indices == test_indices[0]]

In [None]:
timebins = 41
bins_dur = 30
nfolds = 5

task = "active"
delay = 400

test_indices = np.load("kfolds_test_indices.npy", allow_pickle=True)
total_trials = 200
indices = np.arange(0, total_trials)

i1s = np.zeros((timebins, test_indices.shape[0], test_indices.shape[1], len(lbls_order))) # folds, imgs, unique_labels
accs = np.zeros((timebins, test_indices.shape[0])) # timebins, folds
for i in range(timebins):
    print(f"time bins: {bins_dur*i}ms")
    X, y = get_data(task=task, delay=delay, timebins=i)
    print(f"\tX: {X.shape}")
    
    for fold, test_idx in enumerate(test_indices):
        print(f"\t\tfold {fold + 1}")
        
        # Explicitly convert test_idx to integers
        test_idx_int = test_idx.astype(np.int64)
        
        # Create boolean mask
        test_mask = np.zeros(shape=(total_trials), dtype=bool)
        
        # Use the converted indices
        test_mask[test_idx_int] = True
        
        tr_idx = indices[~test_mask]
        val_idx = indices[test_mask]
        print(f"{tr_idx[:10] =  }")
        print(f"{val_idx[:10] = }")
                
        tr = X[tr_idx]
        y_tr = y[tr_idx]
        val = X[val_idx]
        y_val = y[val_idx]

        print(f"{tr.mean()}")
        print(f"{val.mean()}")
        
        print(f"\t\t\ty_tr: {np.unique(y_tr)} {y_tr.shape}")
        print(f"\t\t\ty_val: {np.unique(y_val)} {y_val.shape}")

        multi_clf = get_model()
    
        multi_clf.fit(tr, y_tr)
        y_preds = multi_clf.predict(val)
        y_preds_prob = multi_clf.predict_proba(val)
        i1_metric = get_percent_correct_from_proba(y_preds_prob, y_val, lbls_order)
        print(f"behavioral_acc: {np.nanmean(i1_metric)} +/- {np.nanstd(i1_metric)}")
        print(f"acc: {accuracy_score(y_val, y_preds)}")
        i1s[i,fold] = i1_metric
        accs[i,fold] = accuracy_score(y_val, y_preds)

time bins: 0ms
	X: (200, 192)
		fold 1
tr_idx[:10] =  array([ 1,  2,  6,  8,  9, 10, 13, 15, 17, 18])
val_idx[:10] = array([ 0,  3,  4,  5,  7, 11, 12, 14, 16, 19])
-7.771561172376097e-18
2.960594732333751e-18
			y_tr: ['apple' 'bear' 'bird' 'car' 'chair' 'dog' 'elephant' 'person' 'plane'
 'zebra'] (100, 1)
			y_val: ['apple' 'bear' 'bird' 'car' 'chair' 'dog' 'elephant' 'person' 'plane'
 'zebra'] (100, 1)
behavioral_acc: 0.4801495983320828 +/- 0.4962278625562313
acc: 0.06
		fold 2
tr_idx[:10] =  array([ 0,  3,  4,  5,  7, 11, 12, 14, 16, 19])
val_idx[:10] = array([ 1,  2,  6,  8,  9, 10, 13, 15, 17, 18])
2.960594732333751e-18
-7.771561172376097e-18
			y_tr: ['apple' 'bear' 'bird' 'car' 'chair' 'dog' 'elephant' 'person' 'plane'
 'zebra'] (100, 1)
			y_val: ['apple' 'bear' 'bird' 'car' 'chair' 'dog' 'elephant' 'person' 'plane'
 'zebra'] (100, 1)
behavioral_acc: 0.4980695172872232 +/- 0.49710753492820065
acc: 0.08
		fold 3
tr_idx[:10] =  array([ 2,  4,  6,  7,  8, 11, 13, 14, 16, 18])
val

# Plot Decoder Results

In [None]:

times = generate_times(0, 1200, 30)
fig, ax = plt.subplots(figsize=(7, 7))

for delay in [400, 800, 1200]:
    decoder_results = np.load(f"decoding_accuracy_{delay}ms.npy")
    mean = decoder_results.mean(axis=1) #
    sd = decoder_results.std(axis=1, ddof=1)
    se = sd / np.sqrt(len(mean))

    # Plot mean response
    ax.plot(times, mean, linewidth=2, label=f"{delay}", marker="o", linestyle="--")
    
    # Add SE as a shaded region
    ax.fill_between(times, mean - se, mean + se, alpha=0.3, label=f"{delay}")

# Add vertical dashed lines at 60ms and 180ms
# ax.axvline(x=60, linestyle="-.", color="green", linewidth=1.5, label="Event at 60ms")
# ax.axvline(x=180, linestyle="-.", color="green", linewidth=1.5, label="Event at 180ms")

# Set baseline
ax.axhline(y=0.1, linestyle="-.", color="b", linewidth=1.5, label="baseline")

make_a_plot_beautiful(ax)

# Labels and legend
ax.set_xlabel("Time (ms)")
ax.set_ylabel("Decoder Accuracy")
ax.legend()

plt.show()