In [11]:
import pandas as pd
import numpy as np

import glob
from sklearn.metrics.pairwise import euclidean_distances#transform dataframe into rdm
from scipy.stats import spearmanr #Loocv
from scipy.stats import pointbiserialr

import seaborn as sns
import matplotlib.pyplot as plt

In [12]:
#filter target subjects    
def Filter(string, substr):
    return [str for str in string if
             any(sub in str for sub in substr)]

# Import data

## dim: turn dataframe into RDM

In [13]:
input_dir = '../output_data/individual/dimension/clean_results/'
dim_subj_data_paths_all = glob.glob(input_dir+'dim_rel_scaled/*.csv')

for f in dim_subj_data_paths_all:
    sub_id = f.split('\\')[1].split('.')[0]
    dim_df = pd.read_csv(f,index_col = 0)
    dim_dissim = pd.DataFrame(euclidean_distances(dim_df),
                              index=dim_df.index,columns=dim_df.index)
    dim_dissim.to_csv(input_dir+'RDMs_dim_rel_scaled/'+sub_id+'.csv')

In [14]:
#participants who finished dimension rating
dim_within_data_paths = glob.glob('../output_data/individual/dimension/clean_results/RDMs_dim_rel_scaled/*.csv')  
sub_within_list = [i.split('\\')[1].split('.')[0] for i in dim_within_data_paths]
print('Found participants who finished dimension rating task: '+str(len(sub_within_list))+' participants.')

Found participants who finished dimension rating task: 14 participants.


## cat: filter within participants 

In [15]:
# all 60 participants
ca_subj_data_paths_all = glob.glob('../output_data/individual/category/Subject_Category_RDMs_revised/*.csv')
ma_subj_data_paths_all = glob.glob('../output_data/individual/category/Subject_MA_RDMs_revised/*.csv')

# within participants
#Explicit task
ca_within_data_paths = Filter(ca_subj_data_paths_all, sub_within_list)
print('Explicit task:From {} subjects filter {} subjects.'.format(len(ca_subj_data_paths_all),len(ca_within_data_paths)))
#Implicit task
ma_within_data_paths = Filter(ma_subj_data_paths_all, sub_within_list)
print('Implicit task:From {} subjects filter {} subjects.'.format(len(ma_subj_data_paths_all),len(sub_within_list)))

Explicit task:From 60 subjects filter 14 subjects.
Implicit task:From 60 subjects filter 14 subjects.


# LOOCV

all were used 'spearman'

In [16]:
def LOOCV(subj_data_paths):
    rsa_list = []
    sub_id_list = []
    for left_one in subj_data_paths:
        sub_id_list.append(left_one.split('\\')[1].split('.')[0])
        # get the one left
        left_one_df = pd.read_csv(left_one, index_col=0)
        dissim_left_one = left_one_df
        
        # remove one subjects from all 10 subjects
        subj_data_paths_remove_one = subj_data_paths.copy()
        subj_data_paths_remove_one.remove(left_one)
        
        dissim_remove_one = np.zeros((159,159))

        for f in subj_data_paths_remove_one:
            temp_df = pd.read_csv(f, index_col=0)
            dissim_remove_one += np.array(temp_df)

        dissim_remove_one_avg = dissim_remove_one / len(subj_data_paths_remove_one)
        dissim_remove_one_avg = pd.DataFrame(dissim_remove_one_avg, index=temp_df.index, columns=temp_df.columns)
        
        # caculate the correlation between the one left and others
        array_dissim_left_one = np.array(dissim_left_one)[np.triu_indices(159, k = 1)] 
        array_dissim_remove_one_avg = np.array(dissim_remove_one_avg)[np.triu_indices(159, k = 1)]
           
        rsa_list.append(spearmanr(array_dissim_remove_one_avg, array_dissim_left_one)[0])
    print(len(subj_data_paths_remove_one))
    
    rsa_list_df = pd.DataFrame({'sub_id':sub_id_list,'rsa':rsa_list})
    return rsa_list_df

In [17]:
exp_raw_loocv = LOOCV(subj_data_paths = ca_within_data_paths)
imp_raw_loocv = LOOCV(subj_data_paths = ma_within_data_paths)
dim_raw_loocv = LOOCV(subj_data_paths = dim_within_data_paths)

13
13
13
