In [1]:
from collections import defaultdict
import rsa_io
import rsa
import argparse
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import h5py

In [2]:
eeg_root_path = "/Users/huseyinelmas/Desktop/CCN-Lab/data/set_test/"   
model_root_path = "/Users/huseyinelmas/Desktop/CCN-Lab/RSA-Models/"
w_size = 25
save_path = "/Users/huseyinelmas/Desktop/CCN-Lab/CCN-RSA/Results/"
save_eeg_rdms = True
eeg_rdm_dist_metric = 'correlation'
model_rdm_dist_metric = 'euclidean'

In [3]:
# Create modelRDMs folder in models if not exists
model_rdm_path = save_path + "modelRDMs/"
if not os.path.exists(model_rdm_path):
    os.makedirs(model_rdm_path)

# Create eegRDMs folder in models if not exists
eeg_rdm_path = save_path + "eegRDMs/"
if not os.path.exists(eeg_rdm_path):
    os.makedirs(eeg_rdm_path)

In [4]:
subject_folders = [name for name in os.listdir(eeg_root_path) if
                   os.path.isdir(eeg_root_path + name) and name.startswith("subj")]
n_subjects = len(subject_folders)

In [6]:
# Check if eeg_rdm exists in eeg_rdm_path, meaning that experiment is already done with this w_size and eeg_rdm_distance
eeg_rdm_fname = "eeg_rdm_" + str(w_size) + "_" + eeg_rdm_dist_metric
eeg_rdm_ready = False
if eeg_rdm_fname + ".hdf5" in os.listdir(eeg_rdm_path):
    print("EEG RDMs with parameters with window size: {0} and distance: {1} is already created, loading from {2}".format(str(w_size) ,  eeg_rdm_dist_metric, eeg_rdm_path + eeg_rdm_fname))
    windowed_eeg_rdm_dict, attributes = rsa_io.load_from_hdf5(eeg_rdm_fname,eeg_rdm_path)
    eeg_rdm_ready = True

EEG RDMs with parameters with window size: 25 and distance: correlation is already created, loading from /Users/huseyinelmas/Desktop/CCN-Lab/CCN-RSA/Results/eegRDMs/eeg_rdm_25_correlation
Dataset attributes of eeg_rdm_25_correlation [('distance_metric', 'correlation'), ('w_size', 25)]


In [9]:
# Check if every model rdm was already created in model_RDM_path if all model_rdms exists
# No need to create all over, saves computation
model_rdm_dict = {}
for model_file in os.listdir(model_root_path):
    if not model_file.startswith("."):
        model_name = os.path.splitext(model_file)[0] + "_" + model_rdm_dist_metric
        if not model_name + '.npy' in os.listdir(model_rdm_path):
            model = rsa_io.load_model(file_path=model_root_path + model_file)
            model_rdm_dict[model_name] = rsa.create_rdm(model.values, metric=model_rdm_dist_metric, name=model_name,
                                                        save_path=model_RDM_path, model=True)
        else:
            print("Model RDM for {0} with distance metric: {1} was found in {2},"
                  " loading already created Model RDM ".format(model_name, model_rdm_dist_metric, model_rdm_path))
            model_rdm_dict[model_name] = rsa_io.load_rdm(model_rdm_path + model_name)


Model RDM for movement_nature_euclidean with distance metric: euclidean was found in /Users/huseyinelmas/Desktop/CCN-Lab/CCN-RSA/Results/modelRDMs/, loading already created Model RDM 
Model RDM for action_intention_euclidean with distance metric: euclidean was found in /Users/huseyinelmas/Desktop/CCN-Lab/CCN-RSA/Results/modelRDMs/, loading already created Model RDM 
Model RDM for action_target_euclidean with distance metric: euclidean was found in /Users/huseyinelmas/Desktop/CCN-Lab/CCN-RSA/Results/modelRDMs/, loading already created Model RDM 
Model RDM for biological_appearance_euclidean with distance metric: euclidean was found in /Users/huseyinelmas/Desktop/CCN-Lab/CCN-RSA/Results/modelRDMs/, loading already created Model RDM 
Model RDM for action_category_euclidean with distance metric: euclidean was found in /Users/huseyinelmas/Desktop/CCN-Lab/CCN-RSA/Results/modelRDMs/, loading already created Model RDM 
Model RDM for humanness_euclidean with distance metric: euclidean was found

In [11]:
eeg_rdm_ready

True

In [10]:
if not eeg_rdm_ready:
    # every key is time point and every value is a list of corresponding rdms of different subjects
    windowed_eeg_rdm_dict = defaultdict(list)
    # For all subjects do
    for i, subject_folder in enumerate(subject_folders):
        subj_name = subject_folder[0:6]
        subj_path = eeg_root_path + subject_folder + "/action-mats/"

        # Keys are time windows, Each value in the time_window_representations is a 3D ndarray
        # with size (n_conditions, n_trials, n_channels). Since n_trials are not same for each condition,
        # the missing values are filled with NaN. 
        time_window_representations = rsa_io.build_eeg_data(subj_path, w_size)

        # traverse each window in time_window_representations and calculate rdm
        # TODO: values of windowed_eeg_rdm_dict may be not a list
        #  but 3d numpy array (n_subjects, n_conditions, n_conditions)
        for window, eeg_data in time_window_representations.items():
            eeg_rdm_name = subj_name + '_eeg_rdm_' + str(window[0]) + ":" + str(window[1]) + "_" + eeg_rdm_dist_metric
            if eeg_rdm_name + '.npy' not in os.listdir(eeg_RDM_path):
                windowed_eeg_rdm_dict[window].append(
                    rsa.create_rdm(eeg_data, eeg_rdm_dist_metric, eeg_rdm_name))
            else:
                windowed_eeg_rdm_dict[window].append(rsa_io.load_rdm(eeg_RDM_path + eeg_rdm_name))

    # TODO: try without vstack
    for window, eeg_rdm_list in windowed_eeg_rdm_dict.items():
        windowed_eeg_rdm_dict[window] = np.vstack(eeg_rdm_list)
        
    # Save eeg rdms to hdf5 file
    if(save_eeg_rdms):
        rsa_io.save_to_hdf5(windowed_eeg_rdm_dict, eeg_rdm_dist_metric, w_size, eeg_rdm_fname, eeg_rdm_path)


In [16]:
rdm_statistics_list = []
for model_name, model_RDM in model_rdm_dict.items():
    dist_per_time_window = []
    for time_window, EEG_RDM_list in windowed_eeg_rdm_dict.items():
        kendall_tau, kendall_p_value = rsa.correlate_models(model_RDM, np.mean(EEG_RDM_list, axis=0))
        rdm_statistics_list.append([model_name, time_window, kendall_tau, kendall_p_value])

rdm_statistics_df = pd.DataFrame(rdm_statistics_list,
                                 columns=['Model_name', 'Time_window', 'Kendall_tau', 'Kendall_p-value'])


In [17]:
rdm_statistics_df[rdm_statistics_df["Kendall_p-value"]<0.05].sort_values(by="Kendall_tau", ascending=False)

Unnamed: 0,Model_name,Time_window,Kendall_tau,Kendall_p-value
46,action_target_euclidean,"(50, 75)",0.135101,0.00374
30,action_intention_euclidean,"(50, 75)",0.131989,0.007453
58,biological_appearance_euclidean,"(300, 325)",0.130535,0.008136
57,biological_appearance_euclidean,"(275, 300)",0.125463,0.010973
4,movement_nature_euclidean,"(175, 200)",0.119272,0.015604
84,humanness_euclidean,"(175, 200)",0.119272,0.015604
2,movement_nature_euclidean,"(125, 150)",0.115393,0.019315
82,humanness_euclidean,"(125, 150)",0.115393,0.019315
56,biological_appearance_euclidean,"(250, 275)",0.108978,0.02715
5,movement_nature_euclidean,"(200, 225)",0.101817,0.039001
