In [1]:
import cebra
from cebra import CEBRA
import cebra.models
import os
from scipy import stats
import tempfile
from pathlib import Path
import torch
import pandas as pd
import numpy as np
import pickle as pkl
from matplotlib import pyplot as plt
import matplotlib
from matplotlib.colors import rgb2hex
from sklearn.model_selection import train_test_split
import sys
sys.path.append('/home/garullie/CEBRA_analysis/')
from dataset_load import data_load
from sklearn.model_selection import GroupKFold

In [2]:
from cebra.models.model import _OffsetModel, ConvolutionalModelMixin
from torch import nn
@cebra.models.register("offset200-model") # --> add that line to register the model!
class Offset200Model(_OffsetModel, ConvolutionalModelMixin):

    def __init__(self, num_neurons, num_units, num_output, normalize=True):
        super().__init__(
            
            nn.Conv1d(num_neurons, num_units, 101),
            nn.GELU(),
            nn.Conv1d(num_units, num_units, 21),
            nn.GELU(),
            nn.Conv1d(num_units, num_units, 21),
            nn.GELU(),
            nn.Conv1d(num_units, num_units, 21),
            nn.GELU(),
            nn.Conv1d(num_units, num_units, 21),
            nn.GELU(),
            nn.Conv1d(num_units, num_output, 20),
            num_input=num_neurons,
            num_output=num_output,
            normalize=normalize,
        )

    # ... and you can also redefine the forward method,
    # as you would for a typical pytorch model

    def get_offset(self) -> cebra.data.datatypes.Offset:
        return cebra.data.Offset(100, 100)

In [3]:
skipdates = ["220818", "220819"]
input_folder = "/sc-projects/sc-proj-cc15-ag-wenger-retune/data_kinematic_states_neural/"
VICON_FS = 200
variables_to_drop = ["ANIMAL_ID", "CONDITION", "RUN", "DATE", "EVENT", "ECOG_LEFT_fooof_a_knee", "ECOG_RIGHT_fooof_a_knee"]
#variables_to_drop = ["ANIMAL_ID", "CONDITION", "RUN", "DATE", "EVENT"]

def label_to_float(y):
    y_ = [0. if i=="fog" else 1. if i=="nlm" else 2. for i in y]
    return np.asarray(y_)

def label_to_int(y):
    y_ = [0 if i=="fog" else 1 if i=="nlm" else 2 for i in y]
    return np.asarray(y_)

data_dict = data_load(input_folder, variables_to_drop, skipdates)#, dataset_name="neukin_dataset_baseline_w1500ms")

x = data_dict["X_pd"]
y = data_dict["y_pd"]
animals_id_pd = data_dict["animals_id_pd"]
run_id_pd = data_dict["run_id_pd"]

No file found for 230425 NWE00161 02
No file found for 230425 NWE00161 08
No file found for 230425 NWE00160 04
No file found for 230425 NWE00160 03
No file found for 230425 NWE00164 21


In [7]:
groups = [a+i for a, i in zip(animals_id_pd, run_id_pd)]
rng = np.random.default_rng(seed=42)
unique_groups = np.unique(groups)  
rng.shuffle(unique_groups)  

gkf = GroupKFold(n_splits=11)
gkf.get_n_splits(groups=groups)
X_unique = np.arange(len(unique_groups))  

embeddings = {}

for fold_idx, (train_idx, test_idx) in enumerate(gkf.split(X_unique, groups=unique_groups)):
    print(f"now processing fold {fold_idx}")
    loaded_cebra_model = cebra.CEBRA.load(f"../../neural_folds_cm/models/run_splits_{fold_idx}_pd.pt")
    
    train_groups = unique_groups[train_idx]
    test_groups  = unique_groups[test_idx]
    
    train_mask = np.isin(groups, train_groups)
    test_mask  = np.isin(groups, test_groups)
    
    X_train, y_train = x.iloc[train_mask, 36:], y[train_mask]
    X_test,  y_test  = x.iloc[test_mask, 36:],  y[test_mask]
    
    embedding = loaded_cebra_model.transform(X_train)
    embeddings[fold_idx] = {"embedding": embedding, "y_train": y_train}

with open(f"./embeddings_folds.pkl", 'wb') as handle:
            pkl.dump(embeddings, handle)

now processing fold 0
now processing fold 1
now processing fold 2
now processing fold 3
now processing fold 4
now processing fold 5
now processing fold 6
now processing fold 7
now processing fold 8
now processing fold 9
now processing fold 10


In [9]:
groups = [a+i for a, i in zip(animals_id_pd, run_id_pd)]
rng = np.random.default_rng(seed=42)
unique_groups = np.unique(groups)  
rng.shuffle(unique_groups)  

gkf = GroupKFold(n_splits=11)
gkf.get_n_splits(groups=groups)
X_unique = np.arange(len(unique_groups))  


embeddings_test = {}

for fold_idx, (train_idx, test_idx) in enumerate(gkf.split(X_unique, groups=unique_groups)):
    print(f"now processing fold {fold_idx}")
    loaded_cebra_model = cebra.CEBRA.load(f"../../neural_folds_cm/models/run_splits_{fold_idx}_pd.pt")
    
    train_groups = unique_groups[train_idx]
    test_groups  = unique_groups[test_idx]
    
    train_mask = np.isin(groups, train_groups)
    test_mask  = np.isin(groups, test_groups)
    
    X_train, y_train = x.iloc[train_mask, 36:], y[train_mask]
    X_test,  y_test  = x.iloc[test_mask, 36:],  y[test_mask]
    
    fold_dict = {}
    for feat in X_test.columns:
        print(f"feat: {feat}")
        random_feat = X_test[feat].values.copy()
        rng.shuffle(random_feat)  
        x_perm = X_test.copy()
        x_perm[feat] = random_feat
        test_embedding = loaded_cebra_model.transform(x_perm)
        fold_dict[feat] = test_embedding
    
    embeddings_test[fold_idx] = {"embedding": fold_dict, "y_test": y_test}
    
with open(f"./embeddings_test_folds.pkl", 'wb') as handle:
        pkl.dump(embeddings_test, handle)

now processing fold 0
feat: ECOG_LEFT_RawHjorth_Activity
feat: ECOG_LEFT_RawHjorth_Mobility
feat: ECOG_LEFT_RawHjorth_Complexity
feat: ECOG_RIGHT_RawHjorth_Activity
feat: ECOG_RIGHT_RawHjorth_Mobility
feat: ECOG_RIGHT_RawHjorth_Complexity
feat: ECOG_LEFT_raw
feat: ECOG_RIGHT_raw
feat: ECOG_LEFT_bandpass_activity_low beta
feat: ECOG_LEFT_bandpass_activity_high beta
feat: ECOG_LEFT_bandpass_activity_low gamma
feat: ECOG_LEFT_bandpass_activity_high gamma
feat: ECOG_LEFT_bandpass_activity_HFA
feat: ECOG_RIGHT_bandpass_activity_low beta
feat: ECOG_RIGHT_bandpass_activity_high beta
feat: ECOG_RIGHT_bandpass_activity_low gamma
feat: ECOG_RIGHT_bandpass_activity_high gamma
feat: ECOG_RIGHT_bandpass_activity_HFA
feat: ECOG_LEFT_stft_low beta
feat: ECOG_LEFT_stft_high beta
feat: ECOG_LEFT_stft_low gamma
feat: ECOG_LEFT_stft_high gamma
feat: ECOG_LEFT_stft_HFA
feat: ECOG_RIGHT_stft_low beta
feat: ECOG_RIGHT_stft_high beta
feat: ECOG_RIGHT_stft_low gamma
feat: ECOG_RIGHT_stft_high gamma
feat: ECOG

In [10]:
import os
import sys
import numpy as np
import pickle as pkl
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GroupKFold
sys.path.append('/home/garullie/CEBRA_analysis/')
from dataset_load import data_load


index =  1
print(f"SLURM ID = {index}")


with open(f"./embeddings_folds.pkl", "rb") as input_file:
    file = pkl.load(input_file)[index]
    embedding = file["embedding"]
    y_train = file["y_train"]


with open(f"./embeddings_test_folds.pkl", "rb") as input_file:
    file = pkl.load(input_file)[index]
    test_embedding = file["embedding"]
    y_test = file["y_test"]

fold_dict = {}

decoder = KNeighborsClassifier(n_neighbors=3, metric = "cosine")
decoder.fit(embedding, y_train)
print("decoding")
for feat in test_embedding.keys():
    prediction = decoder.predict(test_embedding[feat])
    prediction_ = np.array(prediction, dtype=np.int64)
    fold_dict[feat] = {"true":y_test, "prediction": prediction_}
    print(feat)
        
with open(f"./fold_eval/permutations_fold_{index}.pkl", 'wb') as handle:
    pkl.dump(fold_dict, handle)

SLURM ID = 1
decoding



KeyboardInterrupt



 # Kinematics

In [12]:
groups = [a+i for a, i in zip(animals_id_pd, run_id_pd)]
rng = np.random.default_rng(seed=42)
unique_groups = np.unique(groups)  
rng.shuffle(unique_groups)  

gkf = GroupKFold(n_splits=11)
gkf.get_n_splits(groups=groups)
X_unique = np.arange(len(unique_groups))  


embeddings = {}

for fold_idx, (train_idx, test_idx) in enumerate(gkf.split(X_unique, groups=unique_groups)):
    print(f"now processing fold {fold_idx}")
    loaded_cebra_model = cebra.CEBRA.load(f"../../kinematic_folds_cm/models/run_splits_{fold_idx}_pd.pt")
    
    train_groups = unique_groups[train_idx]
    test_groups  = unique_groups[test_idx]
    
    train_mask = np.isin(groups, train_groups)
    test_mask  = np.isin(groups, test_groups)
    
    X_train, y_train = x.iloc[train_mask, :36], y[train_mask]
    X_test,  y_test  = x.iloc[test_mask, :36],  y[test_mask]
    
    embedding = loaded_cebra_model.transform(X_train)
    embeddings[fold_idx] = {"embedding": embedding, "y_train": y_train}

with open(f"./embeddings_folds_kin.pkl", 'wb') as handle:
            pkl.dump(embeddings, handle)

now processing fold 0
now processing fold 1
now processing fold 2
now processing fold 3
now processing fold 4
now processing fold 5
now processing fold 6
now processing fold 7
now processing fold 8
now processing fold 9
now processing fold 10


In [13]:
groups = [a+i for a, i in zip(animals_id_pd, run_id_pd)]
rng = np.random.default_rng(seed=42)
unique_groups = np.unique(groups)  
rng.shuffle(unique_groups)  

gkf = GroupKFold(n_splits=11)
gkf.get_n_splits(groups=groups)
X_unique = np.arange(len(unique_groups))  


embeddings_test = {}

for fold_idx, (train_idx, test_idx) in enumerate(gkf.split(X_unique, groups=unique_groups)):
    print(f"now processing fold {fold_idx}")
    loaded_cebra_model = cebra.CEBRA.load(f"../../kinematic_folds_cm/models/run_splits_{fold_idx}_pd.pt")
    
    train_groups = unique_groups[train_idx]
    test_groups  = unique_groups[test_idx]
    
    train_mask = np.isin(groups, train_groups)
    test_mask  = np.isin(groups, test_groups)
    
    X_train, y_train = x.iloc[train_mask, :36], y[train_mask]
    X_test,  y_test  = x.iloc[test_mask, :36],  y[test_mask]
    
    fold_dict = {}
    for feat in X_test.columns:
        print(f"feat: {feat}")
        random_feat = X_test[feat].values.copy()
        rng.shuffle(random_feat)  
        x_perm = X_test.copy()
        x_perm[feat] = random_feat
        test_embedding = loaded_cebra_model.transform(x_perm)
        fold_dict[feat] = test_embedding
    
    embeddings_test[fold_idx] = {"embedding": fold_dict, "y_test": y_test}
    
with open(f"./embeddings_test_folds_kin.pkl", 'wb') as handle:
        pkl.dump(embeddings_test, handle)

now processing fold 0
feat: left_crest_angle_mean
feat: left_hip_angle_mean
feat: left_knee_angle_mean
feat: left_ankle_angle_mean
feat: right_crest_angle_mean
feat: right_hip_angle_mean
feat: right_knee_angle_mean
feat: right_ankle_angle_mean
feat: trunk_cross_l_angle_mean
feat: trunk_cross_r_angle_mean
feat: left_crest_angular_velocity_mean
feat: left_hip_angular_velocity_mean
feat: left_knee_angular_velocity_mean
feat: left_ankle_angular_velocity_mean
feat: right_crest_angular_velocity_mean
feat: right_hip_angular_velocity_mean
feat: right_knee_angular_velocity_mean
feat: right_ankle_angular_velocity_mean
feat: trunk_cross_l_angular_velocity_mean
feat: trunk_cross_r_angular_velocity_mean
feat: lmtp_speed_mean
feat: lankle_speed_mean
feat: lknee_speed_mean
feat: lhip_speed_mean
feat: lcrest_speed_mean
feat: lshoulder_speed_mean
feat: rmtp_speed_mean
feat: rankle_speed_mean
feat: rknee_speed_mean
feat: rhip_speed_mean
feat: rcrest_speed_mean
feat: rshoulder_speed_mean
feat: lmtp_heigh

# kinematics uncorrelated

In [6]:
uncorrelated = ['left_crest_angle_mean', 'right_hip_angle_mean',
       'left_crest_angular_velocity_mean', 'left_hip_angular_velocity_mean',
       'left_ankle_angular_velocity_mean', 'right_hip_angular_velocity_mean',
       'lmtp_speed_mean', 'lankle_speed_mean', 'rmtp_speed_mean']

In [11]:
groups = [a+i for a, i in zip(animals_id_pd, run_id_pd)]
rng = np.random.default_rng(seed=42)
unique_groups = np.unique(groups)  
rng.shuffle(unique_groups)  

gkf = GroupKFold(n_splits=11)
gkf.get_n_splits(groups=groups)
X_unique = np.arange(len(unique_groups))  


embeddings = {}

for fold_idx, (train_idx, test_idx) in enumerate(gkf.split(X_unique, groups=unique_groups)):
    print(f"now processing fold {fold_idx}")
    loaded_cebra_model = cebra.CEBRA.load(f"../models/run_splits_{fold_idx}_pd_kin_uncorrelated.pt")
    
    train_groups = unique_groups[train_idx]
    test_groups  = unique_groups[test_idx]
    
    train_mask = np.isin(groups, train_groups)
    test_mask  = np.isin(groups, test_groups)
    
    X_train, y_train = x.iloc[train_mask, :36], y[train_mask]
    X_test,  y_test  = x.iloc[test_mask, :36],  y[test_mask]

    X_train = X_train[uncorrelated]
    X_test = X_test[uncorrelated]
    
    embedding = loaded_cebra_model.transform(X_train)
    embeddings[fold_idx] = {"embedding": embedding, "y_train": y_train}

with open(f"./embeddings_folds_kin_uncorr.pkl", 'wb') as handle:
            pkl.dump(embeddings, handle)

now processing fold 0
now processing fold 1
now processing fold 2
now processing fold 3
now processing fold 4
now processing fold 5
now processing fold 6
now processing fold 7
now processing fold 8
now processing fold 9
now processing fold 10


In [12]:
groups = [a+i for a, i in zip(animals_id_pd, run_id_pd)]
rng = np.random.default_rng(seed=42)
unique_groups = np.unique(groups)  
rng.shuffle(unique_groups)  

gkf = GroupKFold(n_splits=11)
gkf.get_n_splits(groups=groups)
X_unique = np.arange(len(unique_groups))  


embeddings_test = {}

for fold_idx, (train_idx, test_idx) in enumerate(gkf.split(X_unique, groups=unique_groups)):
    print(f"now processing fold {fold_idx}")
    loaded_cebra_model = cebra.CEBRA.load(f"../models/run_splits_{fold_idx}_pd_kin_uncorrelated.pt")
    
    train_groups = unique_groups[train_idx]
    test_groups  = unique_groups[test_idx]
    
    train_mask = np.isin(groups, train_groups)
    test_mask  = np.isin(groups, test_groups)
    
    X_train, y_train = x.iloc[train_mask, :36], y[train_mask]
    X_test,  y_test  = x.iloc[test_mask, :36],  y[test_mask]
    X_train = X_train[uncorrelated]
    X_test = X_test[uncorrelated]
    
    fold_dict = {}
    for feat in X_test.columns:
        print(f"feat: {feat}")
        random_feat = X_test[feat].values.copy()
        rng.shuffle(random_feat)  
        x_perm = X_test.copy()
        x_perm[feat] = random_feat
        test_embedding = loaded_cebra_model.transform(x_perm)
        fold_dict[feat] = test_embedding
    
    embeddings_test[fold_idx] = {"embedding": fold_dict, "y_test": y_test}
    
with open(f"./embeddings_test_folds_kin_uncorrelated.pkl", 'wb') as handle:
        pkl.dump(embeddings_test, handle)

now processing fold 0
feat: left_crest_angle_mean
feat: right_hip_angle_mean
feat: left_crest_angular_velocity_mean
feat: left_hip_angular_velocity_mean
feat: left_ankle_angular_velocity_mean
feat: right_hip_angular_velocity_mean
feat: lmtp_speed_mean
feat: lankle_speed_mean
feat: rmtp_speed_mean
now processing fold 1
feat: left_crest_angle_mean
feat: right_hip_angle_mean
feat: left_crest_angular_velocity_mean
feat: left_hip_angular_velocity_mean
feat: left_ankle_angular_velocity_mean
feat: right_hip_angular_velocity_mean
feat: lmtp_speed_mean
feat: lankle_speed_mean
feat: rmtp_speed_mean
now processing fold 2
feat: left_crest_angle_mean
feat: right_hip_angle_mean
feat: left_crest_angular_velocity_mean
feat: left_hip_angular_velocity_mean
feat: left_ankle_angular_velocity_mean
feat: right_hip_angular_velocity_mean
feat: lmtp_speed_mean
feat: lankle_speed_mean
feat: rmtp_speed_mean
now processing fold 3
feat: left_crest_angle_mean
feat: right_hip_angle_mean
feat: left_crest_angular_velo