In [1]:
%matplotlib inline

Within-subject SVM classification based on beta weights (per trials) averaged within networks from different grains of MIST parcellation, for CIMAQ memory encoding task (fMRI data).
Mean network betas reflect the engagement of a particular network for each trial.
MIST Parcellations include: 7, 12, 20, 36, 64, 122, 197, 325, 444 networks

Trials (conditions) are classifierd according to:
- task condition (encoding or control task)
- memory performance (hit vs miss, correct vs incorrect source)
- stimulus category (?)

Each model is ran and tested on data from the same subject, and then group statistics (confidence intervals) are computed around accuracy scores from each individual participant.

In [2]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import nilearn
import scipy
import nibabel as nb
import sklearn
import seaborn as sns
import itertools
from tqdm import tqdm

from os import listdir as ls
from os.path import basename as bname
from os.path import dirname as dname
from os.path import expanduser as xpu
from os.path import join as pjoin
from numpy import nan as NaN
from matplotlib import pyplot as plt
from nilearn import image, plotting
from nilearn import masking
from nilearn import plotting
from nilearn import datasets
from nilearn.plotting import plot_stat_map, plot_roi, plot_anat, plot_img, show
from nilearn.input_data import NiftiMasker, NiftiLabelsMasker
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, f1_score
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.preprocessing import MinMaxScaler

import chardet
import loadutils as lu
import sniffbytes as snif
from pathlib import Path
from pandas import DataFrame as df



In [61]:
vocab_columns = [itm.strip() for itm in 
 'PSCID, CandID, Candidate_Age, Data_entry_completion_status, Validity, Visit_stage, 87625_lit_bed,\
 87625_lit_bed_status, 87625_lit_bed_score, 87625_dollar_ship, 87625_dollar_ship_status, \
 87625_dollar_ship_score, 87625_terminer_penny, 87625_terminer_penny_status, 87625_terminer_penny_score, \
 87625_reflechir_winter, 87625_reflechir_winter_status, 87625_reflechir_winter_score, 87625_samedi_breakfast, \
 87625_samedi_breakfast_status, 87625_samedi_breakfast_score, 87625_hiver_repair, 87625_hiver_repair_status, \
 87625_hiver_repair_score, 87625_navire_assemble, 87625_navire_assemble_status, 87625_navire_assemble_score, \
 87625_hier_yesterday, 87625_hier_yesterday_status, 87625_hier_yesterday_score, 87625_arracher_terminate, \
 87625_arracher_terminate_status, 87625_arracher_terminate_score, 87625_phrase_consume, \
 87625_phrase_consume_status, 87625_phrase_consume_score, 87625_reparer_sentence, \
 87625_reparer_sentence_status, 87625_reparer_sentence_score, 87625_designer_confide, \
 87625_designer_confide_status, 87625_designer_confide_score, 87625_prevenir_remorse, \
 87625_prevenir_remorse_status, 87625_prevenir_remorse_score, 87625_compassion_ponder, \
 87625_compassion_ponder_status, 87625_compassion_ponder_score, 87625_remords_compassion, \
 87625_remords_compassion_status, 87625_remords_compassion_score, 87625_colonie_tranquil, \
 87625_colonie_tranquil_status, 87625_colonie_tranquil_score, 87625_plagier_sanctuary, \
 87625_plagier_sanctuary_status, 87625_plagier_sanctuary_score, 87625_falsifier_designate, \
 87625_falsifier_designate_status, 87625_falsifier_designate_score, 87625_sanctuaire_reluctant, \
 87625_sanctuaire_reluctant_status, 87625_sanctuaire_reluctant_score, 87625_audacieux_colony, \
 87625_audacieux_colony_status, 87625_audacieux_colony_score, 87625_generer_generate, \
 87625_generer_generate_status, 87625_generer_generate_score, 87625_tangible_ballad, \
 87625_tangible_ballad_status, 87625_tangible_ballad_score, 87625_bouder_pout, 87625_bouder_pout_status, \
 87625_bouder_pout_score, 87625_reticent_plagiarize, 87625_reticent_plagiarize_status, \
 87625_reticent_plagiarize_score, 87625_quietude_diverse, 87625_quietude_diverse_status, \
 87625_quietude_diverse_score, 87625_heterogene_evolve, 87625_heterogene_evolve_status, \
 87625_heterogene_evolve_score, 87625_consumer_tangible, 87625_consumer_tangible_status, \
 87625_consumer_tangible_score, 87625_amorphe_fortitude, 87625_amorphe_fortitude_status, \
 87625_amorphe_fortitude_score, 87625_epopee_epic, 87625_epopee_epic_status, 87625_epopee_epic_score, \
 87625_litige_audacious, 87625_litige_audacious_status, 87625_litige_audacious_score, 87625_tirade_ominous, \
 87625_tirade_ominous_status, 87625_tirade_ominous_score, 87625_connivence_encumber, \
 87625_connivence_encumber_status, 87625_connivence_encumber_score, 87625_belligerant_tirade, \
 87625_belligerant_tirade_status, \
 87625_belligerant_tirade_score, 87625_remarques, 87625_remarques_status, 87625_score, comments'.split(',')]

['PSCID',
 'CandID',
 'Candidate_Age',
 'Data_entry_completion_status',
 'Validity',
 'Visit_stage',
 '87625_lit_bed',
 '87625_lit_bed_status',
 '87625_lit_bed_score',
 '87625_dollar_ship',
 '87625_dollar_ship_status',
 '87625_dollar_ship_score',
 '87625_terminer_penny',
 '87625_terminer_penny_status',
 '87625_terminer_penny_score',
 '87625_reflechir_winter',
 '87625_reflechir_winter_status',
 '87625_reflechir_winter_score',
 '87625_samedi_breakfast',
 '87625_samedi_breakfast_status',
 '87625_samedi_breakfast_score',
 '87625_hiver_repair',
 '87625_hiver_repair_status',
 '87625_hiver_repair_score',
 '87625_navire_assemble',
 '87625_navire_assemble_status',
 '87625_navire_assemble_score',
 '87625_hier_yesterday',
 '87625_hier_yesterday_status',
 '87625_hier_yesterday_score',
 '87625_arracher_terminate',
 '87625_arracher_terminate_status',
 '87625_arracher_terminate_score',
 '87625_phrase_consume',
 '87625_phrase_consume_status',
 '87625_phrase_consume_score',
 '87625_reparer_sentence',
 

In [62]:
['PSCID',
 'CandID',
 'Candidate_Age',
 'Data_entry_completion_status',
 'Validity',
 'Visit_stage',
 '87625_lit_bed',
 '87625_lit_bed_status',
 '87625_lit_bed_score',
 '87625_dollar_ship',
 '87625_dollar_ship_status',
 '87625_dollar_ship_score',
 '87625_terminer_penny',
 '87625_terminer_penny_status',
 '87625_terminer_penny_score',
 '87625_reflechir_winter',
 '87625_reflechir_winter_status',
 '87625_reflechir_winter_score',
 '87625_samedi_breakfast',
 '87625_samedi_breakfast_status',
 '87625_samedi_breakfast_score',
 '87625_hiver_repair',
 '87625_hiver_repair_status',
 '87625_hiver_repair_score',
 '87625_navire_assemble',
 '87625_navire_assemble_status',
 '87625_navire_assemble_score',
 '87625_hier_yesterday',
 '87625_hier_yesterday_status',
 '87625_hier_yesterday_score',
 '87625_arracher_terminate',
 '87625_arracher_terminate_status',
 '87625_arracher_terminate_score',
 '87625_phrase_consume',
 '87625_phrase_consume_status',
 '87625_phrase_consume_score',
 '87625_reparer_sentence',
 '87625_reparer_sentence_status',
 '87625_reparer_sentence_score',
 '87625_designer_confide',
 '87625_designer_confide_status',
 '87625_designer_confide_score',
 '87625_prevenir_remorse',
 '87625_prevenir_remorse_status',
 '87625_prevenir_remorse_score',
 '87625_compassion_ponder',
 '87625_compassion_ponder_status',
 '87625_compassion_ponder_score',
 '87625_remords_compassion',
 '87625_remords_compassion_status',
 '87625_remords_compassion_score',
 '87625_colonie_tranquil',
 '87625_colonie_tranquil_status',
 '87625_colonie_tranquil_score',
 '87625_plagier_sanctuary',
 '87625_plagier_sanctuary_status',
 '87625_plagier_sanctuary_score',
 '87625_falsifier_designate',
 '87625_falsifier_designate_status',
 '87625_falsifier_designate_score',
 '87625_sanctuaire_reluctant',
 '87625_sanctuaire_reluctant_status',
 '87625_sanctuaire_reluctant_score',
 '87625_audacieux_colony',
 '87625_audacieux_colony_status',
 '87625_audacieux_colony_score',
 '87625_generer_generate',
 '87625_generer_generate_status',
 '87625_generer_generate_score',
 '87625_tangible_ballad',
 '87625_tangible_ballad_status',
 '87625_tangible_ballad_score',
 '87625_bouder_pout',
 '87625_bouder_pout_status',
 '87625_bouder_pout_score',
 '87625_reticent_plagiarize',
 '87625_reticent_plagiarize_status',
 '87625_reticent_plagiarize_score',
 '87625_quietude_diverse',
 '87625_quietude_diverse_status',
 '87625_quietude_diverse_score',
 '87625_heterogene_evolve',
 '87625_heterogene_evolve_status',
 '87625_heterogene_evolve_score',
 '87625_consumer_tangible',
 '87625_consumer_tangible_status',
 '87625_consumer_tangible_score',
 '87625_amorphe_fortitude',
 '87625_amorphe_fortitude_status',
 '87625_amorphe_fortitude_score',
 '87625_epopee_epic',
 '87625_epopee_epic_status',
 '87625_epopee_epic_score',
 '87625_litige_audacious',
 '87625_litige_audacious_status',
 '87625_litige_audacious_score',
 '87625_tirade_ominous',
 '87625_tirade_ominous_status',
 '87625_tirade_ominous_score',
 '87625_connivence_encumber',
 '87625_connivence_encumber_status',
 '87625_connivence_encumber_score',
 '87625_belligerant_tirade',
 '87625_belligerant_tirade_status',
 '87625_belligerant_tirade_score',
 '87625_remarques',
 '87625_remarques_status',
 '87625_score',
 'comments'].__len__()


109

Step 1: import list of participants, and generate sublists of participants who have enough trials per category for each classification.

Encoding vs Control task conditions (all 94)
Stimulus category (all 94)
Hit versus Miss (42 participants; at least 15 trials per condition)
Correct Source versus Wrong Source (49 participants; at least 15 trials per condition)
Correct Source versus Miss (38 participants; at least 15 trials per condition)
*NOTE: ADD filter to exclude participants with too many scrubbed frames?? *

In [None]:
# sorted(ls(xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/participants/Neuropsych/tables')))

In [80]:
tablespath02 = xpu('~/cimaq_memory/neuropsych_tables/neuropsych_tables')
vocab=Path(pjoin(tablespath02,'edited_87625_vocabulaire.csv')).read_text()
vocab=df([line.split(',')[:109] for line in vocab.splitlines()[1:]
 if not line.startswith('(Q)')], columns=vocab.splitlines()[0].split(','))

In [106]:
txtvocab=Path(pjoin(tablespath02,'edited_87625_vocabulaire.csv')).read_text()
raw_txtvocab=Path(pjoin(dname(tablespath02),'87625_vocabulaire.csv')).read_text()
raw_vocab_uncommented=df([line.split(',')[:108] for line in txtvocab.splitlines()
                      if not line.startswith('(Q)')])
raw_comments=[line.split(',')[:108] for line in txtvocab.splitlines()
                      if line.startswith('(Q)')]
len(raw_comments), raw_vocab_uncommented

(2076,
          0        1               2                              3    \
 0      PSCID   CandID   Candidate_Age   Data_entry_completion_status   
 1    7546989   107534            74.1                       Complete   
 2    4509950   108391            70.3                       Complete   
 3    4201521   108776            67.6                       Complete   
 4    4904940   113820            81.5                       Complete   
 ..       ...      ...             ...                            ...   
 387  3925666   990434            67.2                       Complete   
 388  9371132   992462            75.5                       Complete   
 389  5054433   992749            82.2                       Complete   
 390  4576408   996599            68.4                       Complete   
 391  7543023   998166            74.2                       Complete   
 
            4             5               6                      7    \
 0     Validity   Visit_stage   87625_lit_b

In [104]:
[(item[0],item[1].unique())
 for item in vocab_uncommented.iteritems()]
vocab_uncommented.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,98,99,100,101,102,103,104,105,106,107
0,PSCID,CandID,Candidate_Age,Data_entry_completion_status,Validity,Visit_stage,87625_lit_bed,87625_lit_bed_status,87625_lit_bed_score,87625_dollar_ship,...,87625_tirade_ominous_score,87625_connivence_encumber,87625_connivence_encumber_status,87625_connivence_encumber_score,87625_belligerant_tirade,87625_belligerant_tirade_status,87625_belligerant_tirade_score,87625_remarques,87625_remarques_status,87625_score
1,7546989,107534,74.1,Complete,Valid,Approval,,donnee_non_disponible,2,,...,,,,,,,,,,
2,4509950,108391,70.3,Complete,Valid,Approval,,donnee_non_disponible,2,,...,,,,,,,,,,
3,4201521,108776,67.6,Complete,Valid,Approval,,donnee_non_disponible,2,,...,,,,,,,,,,
4,4904940,113820,81.5,Complete,Valid,Approval,,donnee_non_disponible,2,,...,,,,,,,,,,


In [85]:
vocab
[(item[0], item[1].unique()) for item in vocab.iteritems()]
vocab.shape

(391, 109)

In [87]:
from pathlib import Path
tablespath02=xpu('~/cimaq_memory/neuropsych_tables/neuropsych_tables')
'''.decode().translate({'è':'e','é':'e','ê':'e', 'à':'a','â':'a'})'''
tablespath=xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/participants/Neuropsych/tables')
tables=[(table, snif.read_raw(pjoin(tablespath02, table)))
        for table in ls(tablespath02)]
tables=[{'name': itm[0],
        'cols': itm[1].decode('Latin-1').splitlines()[0],
        'table': df([line.split(',') for line in
                     itm[1].decode('Latin-1').splitlines()[1:]])
        }
        for itm in tables]
# [itm[1].iloc[:,1] for itm in tables]
tablesdf = df(tables)
# .set_index().loc[valididxs]

In [100]:
[(table, pd.read_csv(pjoin(tablespath, table), encoding='Latin-1')) for table in ls(tablespath)]

ParserError: Error tokenizing data. C error: Expected 11 fields in line 228, saw 15


In [91]:
dict(tuple(zip(tablesdf.name.values, tablesdf.table.values)))

{'77180_stroop.csv':                                       0       1     2         3      4   \
 0                                7546989  107534  74.1  Complete  Valid   
 1                                4509950  108391  70.3  Complete  Valid   
 2                                4201521  108776  67.6  Complete  Valid   
 3                                4904940  113820  81.5  Complete  Valid   
 4    Condition 4 : 2 erreusr en pratique          None      None   None   
 ..                                   ...     ...   ...       ...    ...   
 315                              3925666  990434  67.2  Complete  Valid   
 316                              9371132  992462  75.5  Complete  Valid   
 317                              5054433  992749  82.2  Complete  Valid   
 318                              4576408  996599  68.4  Complete  Valid   
 319                              7543023  998166  74.2  Complete  Valid   
 
            5     6     7     8     9   ...    24    25    26    2

In [48]:
[(len(row[1].cols.split(',')), row[1].table.shape[1])
 for row in df(tables).iterrows()]


[(108, 197),
 (93, 123),
 (41, 42),
 (32, 33),
 (72, 83),
 (37, 36),
 (29, 22),
 (16, 18),
 (14, 13),
 (25, 27),
 (255, 297),
 (40, 39),
 (41, 40),
 (12, 13),
 (157, 247),
 (11, 18),
 (77, 79),
 (41, 40),
 (54, 70),
 (20, 19),
 (70, 69),
 (10, 10)]

In [25]:
[(row[1].table.shape, len(row[1].cols.split(',')),
  row[1].table.shape[1] == len(row[1].cols.split(',')))
 for row in df(tables).iterrows()]
# df(tables).iloc[0].cols

[((285, 36), 37, False),
 ((307, 83), 72, False),
 ((307, 27), 25, False),
 ((4235, 123), 93, False),
 ((286, 297), 255, False),
 ((4687, 22), 29, False),
 ((316, 18), 16, False),
 ((286, 40), 41, False),
 ((320, 33), 32, False),
 ((539, 18), 11, False),
 ((287, 39), 40, False),
 ((293, 42), 41, False),
 ((290, 13), 12, False),
 ((287, 19), 20, False),
 ((399, 197), 108, False),
 ((300, 79), 77, False),
 ((287, 40), 41, False),
 ((10074, 69), 70, False),
 ((387, 70), 54, False),
 ((18336, 247), 157, False),
 ((285, 10), 10, True),
 ((287, 13), 14, False)]

In [127]:
# chardet.detect(b'\xc3\xa9')
# 'é'.encode('UTF-8'), 'é'.encode('ISO-8859-1')
chardet.detect(b'0xe9')
b'0xe9'.decode()

'0xe9'

In [109]:
subtestspath=xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/participants/Neuropsych/subtests')
subtests=[pd.read_csv(file, sep='\t').set_index('CandID') for file in lu.loadimages(subtestspath)]
allindexes=[int(itm) for itm in pd.Series(lu.flatten(tuple(test.index)
                                          for test in subtests)).unique()[:-2]]
qcindexes=list(qc_ok.set_index('sub_ids').index)
def lst_intersection(lst1, lst2):
    '''
    Source: https://www.geeksforgeeks.org/python-intersection-two-lists/
    '''
    return [value for value in lst1 if value in set(lst2)]
valid_indexes=lst_intersection(allindexes, qcindexes)
#                if not itm in [630120, 956130, 267168, 427357, 127228, 845675, 886007, 197192, 920577, 956049, 988602]]
#  in valid_indexes
subtests=[subtest.loc[[ind for ind in pd.Index(valid_indexes)
                       if ind in subtest.index]] for subtest in subtests]
valididxs=[int(itm) for itm in pd.Series(
    lu.flatten(list(subtest.index)
               for subtest in subtests)).unique()]
allsubtests=pd.concat(subtests, axis=1)
allsubtests.index=allsubtests.index.astype(int)
allsubtests

Unnamed: 0_level_0,24918_score_hist_rappel_immediat,d.70664_score,86932_mots_justes_essai_1,86932_mots_justes_essai_total1,86932_mots_justes_rappel_diff_a,86932_score_total_reconnaissance,34013_cdr_sb,40801_score_hist_rappel_differe,12783_score,12783_score_scolarite,...,71233_rappel_alpha_item_reussis,71233_rappel_alpha_pourcentage,77180_cond3_temps_total,77180_cond3_total_erreurs_corrigees,77180_cond3_total_erreurs_non_corrigees,77180_cond4_temps_total,77180_cond4_total_erreurs_corrigees,77180_cond4_total_erreurs_non_corrigees,18087_score_libre_correcte,18087_score_indice_correcte
CandID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
108391,16.0,13.0,6,53.0,11.0,45.0,0.0,16.0,30.0,30.0,...,10.0,33.3,38.46,1.0,0.0,46.61,0.0,0.0,7.0,6.0
120839,20.0,1.0,8,57.0,11.0,44.0,0.0,19.0,27.0,27.0,...,14.0,93.3,75.64,3.0,0.0,63.11,4.0,2.0,9.0,6.0
122922,16.0,8.0,6,48.0,9.0,49.0,0.5,17.0,28.0,28.0,...,7.0,35.0,,,,,,,5.0,5.0
127228,10.0,12.0,6,48.0,12.0,47.0,0.0,6.0,29.0,29.0,...,11.0,44.0,58.00,1.0,0.0,48.00,0.0,1.0,7.0,5.0
139593,12.0,15.0,5,42.0,4.0,44.0,0.5,11.0,23.0,23.0,...,14.0,56.0,94.52,2.0,2.0,101.58,3.0,1.0,6.0,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979001,18.0,2.0,7,53.0,12.0,50.0,0.0,19.0,29.0,29.0,...,20.0,80.0,67.00,1.0,0.0,55.95,1.0,1.0,7.0,5.0
983291,14.0,4.0,7,49.0,13.0,49.0,0.0,14.0,27.0,27.0,...,14.0,56.0,59.21,0.0,0.0,66.68,1.0,1.0,8.0,7.0
988602,20.0,4.0,5,53.0,10.0,48.0,0.0,19.0,27.0,27.0,...,14.0,70.0,54.38,0.0,0.0,45.50,0.0,0.0,8.0,2.0
996599,12.0,3.0,6,43.0,8.0,43.0,0.5,10.0,26.0,26.0,...,9.0,45.0,100.00,1.0,0.0,68.00,2.0,3.0,8.0,6.0


In [110]:
behavfile=xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/participants/TaskResults/fMRI_behavMemoScores.tsv')
fMRI_behavMemoScores=pd.read_csv(behavfile, sep='\t').set_index('dccID').loc[valididxs].sort_index()
motionfile=xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/participants/MotionResults/fMRI_meanMotion.tsv')
fMRI_meanMotion=pd.read_csv(motionfile, sep='\t').set_index('id').loc[valididxs].sort_index()
ALL_Neuropsych_file=xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/participants/Neuropsych/ALL_Neuropsych_scores.tsv')
ALL_Neuropsych_scores=pd.read_csv(ALL_Neuropsych_file, sep='\t').set_index('dccid').loc[valididxs].sort_index()
ALL_Neuropsych_scores

Unnamed: 0_level_0,hachinski_score,cdr_sb,mmse_total,moca_score,moca_score_schooling,gds_score,WAIS_digit_symbol_total,trailA_time,trailB_time,trailB_trailA_ratio,...,memoria_free_correct,memoria_total_correct,name_face_immediate_recall,name_face_delayed_recall,log_story_immediate_recall,log_story_delayed_recall,RAVLT_trial1,RAVLT_total,RAVLT_delRecall,RAVLT_recognition
dccid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
108391,1.0,0.0,25.0,30.0,30.0,13.0,52.0,29.96,39.53,1.32,...,7.0,13.0,7.0,8.0,16.0,16.0,6.0,53.0,11.0,45.0
120839,0.0,0.0,25.0,27.0,27.0,1.0,78.0,55.68,123.52,2.22,...,9.0,15.0,7.0,5.0,20.0,19.0,8.0,57.0,11.0,44.0
122922,0.0,0.5,25.0,28.0,28.0,8.0,66.0,38.88,96.88,2.49,...,5.0,10.0,4.0,2.0,16.0,17.0,6.0,48.0,9.0,49.0
127228,0.0,0.0,25.0,29.0,29.0,12.0,59.0,28.00,54.00,1.93,...,7.0,12.0,5.0,5.0,10.0,6.0,6.0,48.0,12.0,47.0
139593,0.0,0.5,26.0,23.0,23.0,15.0,45.0,60.84,161.60,2.66,...,6.0,13.0,2.0,1.0,12.0,11.0,5.0,42.0,4.0,44.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979001,0.0,0.0,26.0,29.0,29.0,2.0,73.0,29.00,50.87,1.75,...,7.0,12.0,3.0,5.0,18.0,19.0,7.0,53.0,12.0,50.0
983291,0.0,0.0,24.0,27.0,27.0,4.0,53.0,38.97,73.25,1.88,...,8.0,15.0,7.0,7.0,14.0,14.0,7.0,49.0,13.0,49.0
988602,0.0,0.0,26.0,27.0,27.0,4.0,69.0,29.72,56.76,1.91,...,8.0,10.0,,,20.0,19.0,5.0,53.0,10.0,48.0
996599,1.0,0.5,21.0,26.0,26.0,3.0,55.0,58.41,98.00,1.68,...,8.0,14.0,4.0,5.0,12.0,10.0,6.0,43.0,8.0,43.0


In [111]:
# lu.loadfiles(lu.loaddirs(xpu('~/../../data/simexp/DATA/cimaq_03-19')))
datapath=xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data')
pd.Series([dname(apath) for apath in
 lu.loadfiles(lu.loaddirs(datapath)).fpaths.unique().tolist()]).unique().tolist()
# [(dname(itm), itm, ls(itm)) for itm in
#  [pjoin(datapath, itm[0], itm[1]) for itm in
#  pd.Series(tuple(zip(lu.loadfiles(lu.loaddirs(datapath)).parent,
#                     lu.loadfiles(lu.loaddirs(datapath)).filename))).unique()]]

['/home/fnadeau/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/features/beta_maps',
 '/home/fnadeau/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/templates/MIST_parcellation/Release',
 '/home/fnadeau/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/templates',
 '/home/fnadeau/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/participants',
 '/home/fnadeau/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/templates/MIST_parcellation',
 '/home/fnadeau/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/features/beta_maps/998166',
 '/home/fnadeau/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/features/beta_maps/229301',
 '/home/fnadeau/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/features/beta_maps/658178',
 '/home/fnadeau/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/features/beta_maps/956130'

In [112]:
data_task_files=xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/participants')
ls(data_task_files)

['sublist_3.tsv',
 'sublist_4.tsv',
 'Neuropsych',
 'sub_list.tsv',
 'TaskResults',
 'sublist_2.tsv',
 'Participants_bids.tsv',
 'MotionResults',
 'Participants_bids_headers.json',
 'Splitting_list.tsv',
 'MemoTaskParticipantFile_headers.json',
 'MemoTaskParticipantFile.tsv',
 'sublist_1.tsv',
 'sub_list_TaskQC.tsv']

In [113]:

# Path to directory with participant lists
# data_file = xpu('~/../../data/simexp/DATA/cimaq_03-19/Data/Participants/Splitting/Sub_list.tsv')
data_file = xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/participants/Participants_bids.tsv')
qcfile = xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/participants/sub_list_TaskQC.tsv')
qc_ok=pd.read_csv(qcfile, sep='\t')
MemoTaskParticipantFile=xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/participants/MemoTaskParticipantFile.tsv')
mtaskfile=pd.read_csv(MemoTaskParticipantFile, sep='\t').set_index('participant_id').loc[qc_ok.set_index('sub_ids').index]
sub_data = pd.read_csv(data_file, sep = '\t')

# sub_data.where(sub_data.participant_id in qc_ok.values)
sub_data=sub_data.set_index('participant_id').loc[qc_ok.set_index('sub_ids').index].sort_index()
# sub_data=sub_data.reset_index(drop=False)
sub_data
mtaskfile
allinfos=pd.concat([sub_data, mtaskfile.sort_index(),
                    fMRI_behavMemoScores, fMRI_meanMotion,
                    ALL_Neuropsych_scores, allsubtests], axis=1)
allinfos

Unnamed: 0,clinical_age_months,schooling,sex,group,group_detail,site,pscid,fMRI,cognitive_status,clinical_age_months.1,...,71233_rappel_alpha_item_reussis,71233_rappel_alpha_pourcentage,77180_cond3_temps_total,77180_cond3_total_erreurs_corrigees,77180_cond3_total_erreurs_non_corrigees,77180_cond4_temps_total,77180_cond4_total_erreurs_corrigees,77180_cond4_total_erreurs_non_corrigees,18087_score_libre_correcte,18087_score_indice_correcte
108391,843.6,21.0,Female,SCD,SCD,IGM,4509950,yes,SCD,843.6,...,10.0,33.3,38.46,1.0,0.0,46.61,0.0,0.0,7.0,6.0
120839,940.8,14.0,Female,CTL,CTL,IGM,7424803,yes,Controls,940.8,...,14.0,93.3,75.64,3.0,0.0,63.11,4.0,2.0,9.0,6.0
122922,894.0,18.0,Male,SCD,SCD+,IGM,7874568,yes,SCD,894.0,...,7.0,35.0,,,,,,,5.0,5.0
127228,879.6,14.0,Female,SCD,SCD+,IGM,3865361,yes,SCD,879.6,...,11.0,44.0,58.00,1.0,0.0,48.00,0.0,1.0,7.0,5.0
139593,906.0,16.0,Female,MCI,eMCI,JGH,5760364,yes,MCI,906.0,...,14.0,56.0,94.52,2.0,2.0,101.58,3.0,1.0,6.0,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979001,795.6,19.0,Male,CTL,CTL,CNQ,7516889,yes,Controls,795.6,...,20.0,80.0,67.00,1.0,0.0,55.95,1.0,1.0,7.0,5.0
983291,796.8,16.0,Female,SCD,SCD,CNQ,3874544,yes,SCD,796.8,...,14.0,56.0,59.21,0.0,0.0,66.68,1.0,1.0,8.0,7.0
988602,813.6,17.0,Female,SCD,SCD+,IGM,6633412,yes,SCD,813.6,...,14.0,70.0,54.38,0.0,0.0,45.50,0.0,0.0,8.0,2.0
996599,820.8,13.0,Female,MCI,eMCI,CNQ,4576408,yes,MCI,820.8,...,9.0,45.0,100.00,1.0,0.0,68.00,2.0,3.0,8.0,6.0


In [35]:


# Exclude participants who failed QC
# allinfos = allinfos[allinfos['QC_status']!= 'F']

## ADD filter to exclude participants with too many scrubbed frames?? ##

# Set minimal number of trials needed per subject to include them in analysis
num = 14

# Encoding vs Control, and Stimulus Category classifications
all_subs = allinfos.index
all_diagnosis = allinfos['cognitive_status']
print(all_subs)
print(len(all_subs))

# Hit versus Miss
hm_data = allinfos[allinfos['hits'] > num]
hm_data = hm_data[hm_data['miss'] > num]
hm_subs = hm_data.index
hm_diagnosis = hm_data['cognitive_status']
print(hm_subs)
print(len(hm_subs))

# Correct Source versus Wrong Source 
cw_data = allinfos[allinfos['correct_source'] > num]
cw_data = cw_data[cw_data['wrong_source'] > num]
cw_subs = cw_data.index
cw_diagnosis = cw_data['cognitive_status']
print(cw_subs)
print(len(cw_subs))

# Correct Source versus Miss
cmiss_data = allinfos[allinfos['correct_source'] > num]
cmiss_data = cmiss_data[cmiss_data['miss'] > num]
cmiss_subs = cmiss_data.index
cmiss_diagnosis = cmiss_data['cognitive_status']
print(cmiss_subs)
print(len(cmiss_subs))


Int64Index([108391, 120839, 122922, 127228, 139593, 147863, 150649, 164965,
            175295, 178101, 189005, 197192, 199801, 219637, 229301, 247659,
            254402, 255499, 258618, 258912, 267168, 270218, 271596, 314409,
            326073, 336665, 337021, 350555, 370092, 385370, 386333, 396250,
            403131, 408506, 413474, 427357, 437101, 439776, 441008, 458807,
            459801, 462345, 484204, 490035, 502616, 517070, 520377, 543589,
            549994, 555537, 567214, 597569, 619278, 628299, 630120, 652850,
            658178, 659068, 668786, 677561, 711830, 729722, 739694, 748676,
            763590, 778749, 783781, 785217, 785245, 804743, 845675, 866812,
            878354, 884343, 886007, 893978, 901551, 906145, 914042, 915022,
            920577, 932933, 936730, 938001, 955548, 956049, 956130, 968913,
            974246, 979001, 983291, 988602, 996599, 998166],
           dtype='int64', name='sub_ids')
94
Int64Index([108391, 122922, 139593, 164965, 199801, 247659

Step 2. Set up paths of directories of interest

Create empty data structures to save and export classification results


In [195]:
# set paths to directories of interest
beta_dir = xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/features/beta_maps')
# label_dir = '/Users/mombot/Documents/Simexp/CIMAQ/Data/Nistats/Events'
label_dir=xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/task_files/events')
# mask_dir = '/Users/mombot/Documents/Simexp/CIMAQ/Data/masks'
mask_dir=xpu('~/../../data/simexp/DATA/cimaq_03-19/derivatives/CIMAQ_fmri_memory/data/masks')
output_dir = '/Users/mombot/Documents/Simexp/CIMAQ/Data/Nilearn/Group_results'


Step 3. ENCODING VERSUS CONTROL TASK CLASSIFICATION

Build and test model for each participant on list, and compile data in a single pandas dataframe

In [51]:
# build data structure to store accuracy data and coefficients
enc_ctl_data = pd.DataFrame()
enc_ctl_data.insert(loc = 0, column = 'dccid', value = 'None', allow_duplicates=True)
# enc_ctl_data.insert(loc = 1, column = 'diagnosis', value = 'None', allow_duplicates=True)
for i in range(0, 10):
    enc_ctl_data.insert(loc = enc_ctl_data.shape[1], column = \
                        'CV'+str(i+1)+'_acc', value = NaN, allow_duplicates=True)
    enc_ctl_data.insert(loc = enc_ctl_data.shape[1], column = \
                        'TrainSet_MeanCV_acc', value = 'None', allow_duplicates=True)
    enc_ctl_data.insert(loc = enc_ctl_data.shape[1], column = \
                        'TestSet_acc', value = 'None', allow_duplicates=True)
enc_ctl_data

Unnamed: 0,dccid,CV1_acc,TrainSet_MeanCV_acc,TestSet_acc,CV2_acc,TrainSet_MeanCV_acc.1,TestSet_acc.1,CV3_acc,TrainSet_MeanCV_acc.2,TestSet_acc.2,...,TestSet_acc.3,CV8_acc,TrainSet_MeanCV_acc.3,TestSet_acc.4,CV9_acc,TrainSet_MeanCV_acc.4,TestSet_acc.5,CV10_acc,TrainSet_MeanCV_acc.5,TestSet_acc.6


In [196]:
# ENCODING VERSUS CONTROL TASK CLASSIFICATION



for sub in all_subs:
    print(sub)
    s_data = [sub]
    # load subject's beta maps (one per trial)
    betas = image.load_img(img=pjoin(beta_dir, str(sub), 'TrialContrasts/betas_sub'+str(sub)+'*.nii'),
                           wildcards=True)
    # initialize NiftiMasker object    
    sub_mask = nb.load(pjoin(mask_dir, 'func_sub'+str(sub)+'_mask_stereonl.nii'))
    sub_masker = NiftiMasker(mask_img=sub_mask, standardize=True)
    
    # transform subject's beta maps into vector of network means per trial
    X_enc_ctl = sub_masker.fit_transform(betas)

    # load subject's trial labels
#     labels_file = pjoin(label_dir, 'sub-'+str(sub)+'_enco_ctl.tsv')
    enco_ctl_labels = pd.read_csv(pjoin(label_dir, 'sub-'+str(sub)+'_enco_ctl.tsv'), sep='\t')
    y_enco_ctl = enco_ctl_labels['condition']
    
    # mask data to exclude trials of no interest
    # does not apply here
    
    # Split trials into a training and a test set
    X_train, X_test, y_train, y_test = train_test_split(
        X_enc_ctl, # x
        y_enco_ctl, # y
        test_size = 0.4, # 60%/40% split
        shuffle = True, # shuffle dataset before splitting
        stratify = y_enco_ctl, # keep distribution of conditions consistent betw. train & test sets
        #random_state = 123  # if set number, same shuffle each time, otherwise randomization algo
        ) 
    print('training:', len(X_train), 'testing:', len(X_test))
    print(y_train.value_counts(), y_test.value_counts())
    
    # define the model
    sub_svc = SVC(kernel='linear', class_weight='balanced')
    
    # do cross-validation to evaluate model performance
    # within 10 folds of training set
    # predict
    y_pred = cross_val_predict(sub_svc, X_train, y_train,
                               groups=y_train, cv=10)
    # scores
    cv_acc = cross_val_score(sub_svc, X_train, y_train,
                         groups=y_train, cv=10)
    print(cv_acc)
    
    for i in range(0, len(cv_acc)):
        s_data.append(cv_acc[i])
        
    # evaluate overall model performance on training data
    overall_acc = accuracy_score(y_pred = y_pred, y_true = y_train)
    overall_cr = classification_report(y_pred = y_pred, y_true = y_train)
    print('Accuracy:',overall_acc)
    print(overall_cr)
    
    s_data.append(overall_acc)

    # Test model on unseen data from the test set
    sub_svc.fit(X_train, y_train)
    y_pred = sub_svc.predict(X_test) # classify age class using testing data
    acc = sub_svc.score(X_test, y_test) # get accuracy

    cr = classification_report(y_pred=y_pred, y_true=y_test) # get prec., recall & f1
    # print results
    print('accuracy =', acc)
    print(cr)  
    
    s_data.append(acc)
    
    # get map of coefficients    
    # coef_ = sub_svc.coef_
    # print(coef_.shape)
    #Return voxel weights into a nifti image using the NiftiMasker
    # coef_img = sub_masker.inverse_transform(coef_)
    #Save .nii to file
    # coef_img.to_filename(pjoin(output_dir, 'Coef_maps', 'SVC_coeff_enc_ctl_sub-'+str(sub)+'.nii'))

    enc_ctl_data = enc_ctl_data.append(pd.Series(s_data, index=enc_ctl_data.columns), ignore_index=True)

demo_data = sub_data.copy()
demo_data.reset_index(level=None, drop=False, inplace=True)

enc_ctl_data.insert(loc = 1, column = 'cognitive_status', value = \
                    demo_data['cognitive_status'], allow_duplicates=True)
enc_ctl_data.insert(loc = 2, column = 'total_scrubbed_frames',
                    value = demo_data['total_scrubbed_frames'], allow_duplicates=True)
enc_ctl_data.insert(loc = 3, column = 'mean_FD',
                    value = demo_data['mean_FD'], allow_duplicates=True)
enc_ctl_data.insert(loc = 4, column = 'hits',
                    value = demo_data['hits'], allow_duplicates=True)
enc_ctl_data.insert(loc = 5, column = 'miss',
                    value = demo_data['miss'], allow_duplicates=True)
enc_ctl_data.insert(loc = 6, column = 'correct_source',
                    value = demo_data['correct_source'], allow_duplicates=True)
enc_ctl_data.insert(loc = 7, column = 'wrong_source',
                    value = demo_data['wrong_source'], allow_duplicates=True)
enc_ctl_data.insert(loc = 8, column = 'dprime',
                    value = demo_data['dprime'], allow_duplicates=True)
enc_ctl_data.insert(loc = 9, column = 'associative_memScore',
                    value = demo_data['associative_memScore'], allow_duplicates=True)    
    



108391
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.42857143 0.57142857 0.71428571 0.85714286 0.71428571 0.71428571
 1.         0.42857143 0.71428571 0.42857143]
Accuracy: 0.6571428571428571
              precision    recall  f1-score   support

         CTL       0.48      0.43      0.45        23
         Enc       0.73      0.77      0.75        47

    accuracy                           0.66        70
   macro avg       0.61      0.60      0.60        70
weighted avg       0.65      0.66      0.65        70

accuracy = 0.9148936170212766
              precision    recall  f1-score   support

         CTL       0.93      0.81      0.87        16
         Enc       0.91      0.97      0.94        31

    accuracy                           0.91        47
   macro avg       0.92      0.89      0.90        47
weighted avg       0.92      0.91      0.91        47

120839
training: 70 testing: 47
Enc    47


accuracy = 0.8723404255319149
              precision    recall  f1-score   support

         CTL       0.86      0.75      0.80        16
         Enc       0.88      0.94      0.91        31

    accuracy                           0.87        47
   macro avg       0.87      0.84      0.85        47
weighted avg       0.87      0.87      0.87        47

178101
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.85714286 0.85714286 0.57142857 0.85714286 1.         0.85714286
 0.85714286 0.71428571 1.         0.85714286]
Accuracy: 0.8428571428571429
              precision    recall  f1-score   support

         CTL       0.83      0.65      0.73        23
         Enc       0.85      0.94      0.89        47

    accuracy                           0.84        70
   macro avg       0.84      0.79      0.81        70
weighted avg       0.84      0.84      0.84        70

accuracy = 0.6808510638297872
            

accuracy = 0.6304347826086957
              precision    recall  f1-score   support

         CTL       0.47      0.44      0.45        16
         Enc       0.71      0.73      0.72        30

    accuracy                           0.63        46
   macro avg       0.59      0.59      0.59        46
weighted avg       0.63      0.63      0.63        46

258618
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[1.         0.85714286 0.85714286 1.         0.85714286 0.85714286
 1.         1.         1.         1.        ]
Accuracy: 0.9428571428571428
              precision    recall  f1-score   support

         CTL       0.95      0.87      0.91        23
         Enc       0.94      0.98      0.96        47

    accuracy                           0.94        70
   macro avg       0.95      0.92      0.93        70
weighted avg       0.94      0.94      0.94        70

accuracy = 0.8297872340425532
            

accuracy = 0.9148936170212766
              precision    recall  f1-score   support

         CTL       0.93      0.81      0.87        16
         Enc       0.91      0.97      0.94        31

    accuracy                           0.91        47
   macro avg       0.92      0.89      0.90        47
weighted avg       0.92      0.91      0.91        47

350555
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.71428571 1.         1.         1.         0.71428571 0.57142857
 0.85714286 0.85714286 0.85714286 0.71428571]
Accuracy: 0.8285714285714286
              precision    recall  f1-score   support

         CTL       0.76      0.70      0.73        23
         Enc       0.86      0.89      0.88        47

    accuracy                           0.83        70
   macro avg       0.81      0.79      0.80        70
weighted avg       0.83      0.83      0.83        70

accuracy = 0.7021276595744681
            

accuracy = 0.723404255319149
              precision    recall  f1-score   support

         CTL       0.64      0.44      0.52        16
         Enc       0.75      0.87      0.81        31

    accuracy                           0.72        47
   macro avg       0.69      0.65      0.66        47
weighted avg       0.71      0.72      0.71        47

437101
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.71428571 0.71428571 0.85714286 0.71428571 0.71428571 0.71428571
 0.85714286 0.85714286 1.         1.        ]
Accuracy: 0.8142857142857143
              precision    recall  f1-score   support

         CTL       0.78      0.61      0.68        23
         Enc       0.83      0.91      0.87        47

    accuracy                           0.81        70
   macro avg       0.80      0.76      0.78        70
weighted avg       0.81      0.81      0.81        70

accuracy = 0.8085106382978723
             

accuracy = 0.7872340425531915
              precision    recall  f1-score   support

         CTL       0.71      0.62      0.67        16
         Enc       0.82      0.87      0.84        31

    accuracy                           0.79        47
   macro avg       0.77      0.75      0.76        47
weighted avg       0.78      0.79      0.78        47

517070
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[1.         0.85714286 0.85714286 0.85714286 0.71428571 1.
 1.         0.71428571 0.85714286 0.57142857]
Accuracy: 0.8428571428571429
              precision    recall  f1-score   support

         CTL       0.88      0.61      0.72        23
         Enc       0.83      0.96      0.89        47

    accuracy                           0.84        70
   macro avg       0.85      0.78      0.80        70
weighted avg       0.85      0.84      0.83        70

accuracy = 0.8085106382978723
              precis

accuracy = 0.7872340425531915
              precision    recall  f1-score   support

         CTL       0.69      0.69      0.69        16
         Enc       0.84      0.84      0.84        31

    accuracy                           0.79        47
   macro avg       0.76      0.76      0.76        47
weighted avg       0.79      0.79      0.79        47

630120
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.71428571 0.71428571 0.42857143 0.85714286 0.71428571 1.
 0.85714286 0.71428571 0.85714286 1.        ]
Accuracy: 0.7857142857142857
              precision    recall  f1-score   support

         CTL       0.72      0.57      0.63        23
         Enc       0.81      0.89      0.85        47

    accuracy                           0.79        70
   macro avg       0.76      0.73      0.74        70
weighted avg       0.78      0.79      0.78        70

accuracy = 0.8085106382978723
              precis

accuracy = 0.8297872340425532
              precision    recall  f1-score   support

         CTL       0.83      0.62      0.71        16
         Enc       0.83      0.94      0.88        31

    accuracy                           0.83        47
   macro avg       0.83      0.78      0.80        47
weighted avg       0.83      0.83      0.82        47

748676
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.85714286 0.71428571 0.85714286 0.85714286 0.85714286 0.57142857
 0.71428571 0.57142857 0.85714286 1.        ]
Accuracy: 0.7857142857142857
              precision    recall  f1-score   support

         CTL       0.70      0.61      0.65        23
         Enc       0.82      0.87      0.85        47

    accuracy                           0.79        70
   macro avg       0.76      0.74      0.75        70
weighted avg       0.78      0.79      0.78        70

accuracy = 0.7021276595744681
            

accuracy = 0.7021276595744681
              precision    recall  f1-score   support

         CTL       0.57      0.50      0.53        16
         Enc       0.76      0.81      0.78        31

    accuracy                           0.70        47
   macro avg       0.66      0.65      0.66        47
weighted avg       0.69      0.70      0.70        47

878354
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.71428571 0.71428571 0.85714286 1.         1.         0.71428571
 0.71428571 0.85714286 0.85714286 0.85714286]
Accuracy: 0.8285714285714286
              precision    recall  f1-score   support

         CTL       0.79      0.65      0.71        23
         Enc       0.84      0.91      0.88        47

    accuracy                           0.83        70
   macro avg       0.82      0.78      0.80        70
weighted avg       0.83      0.83      0.82        70

accuracy = 0.9361702127659575
            

accuracy = 0.8085106382978723
              precision    recall  f1-score   support

         CTL       0.73      0.69      0.71        16
         Enc       0.84      0.87      0.86        31

    accuracy                           0.81        47
   macro avg       0.79      0.78      0.78        47
weighted avg       0.81      0.81      0.81        47

932933
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.71428571 0.85714286 0.71428571 0.57142857 1.         0.85714286
 0.71428571 0.85714286 1.         0.71428571]
Accuracy: 0.8
              precision    recall  f1-score   support

         CTL       0.70      0.70      0.70        23
         Enc       0.85      0.85      0.85        47

    accuracy                           0.80        70
   macro avg       0.77      0.77      0.77        70
weighted avg       0.80      0.80      0.80        70

accuracy = 0.8297872340425532
              precision    

accuracy = 0.8085106382978723
              precision    recall  f1-score   support

         CTL       0.82      0.56      0.67        16
         Enc       0.81      0.94      0.87        31

    accuracy                           0.81        47
   macro avg       0.81      0.75      0.77        47
weighted avg       0.81      0.81      0.80        47

983291
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.85714286 1.         1.         0.85714286 0.85714286 0.85714286
 1.         0.57142857 0.85714286 0.71428571]
Accuracy: 0.8571428571428571
              precision    recall  f1-score   support

         CTL       0.88      0.65      0.75        23
         Enc       0.85      0.96      0.90        47

    accuracy                           0.86        70
   macro avg       0.87      0.80      0.82        70
weighted avg       0.86      0.86      0.85        70

accuracy = 0.851063829787234
             

KeyError: 'cognitive_status'

In [None]:
enc_ctl_data.to_csv(pjoin(xpu('~/Documents', 'SVC_withinSub_enc_ctl_wholeBrain.tsv'),
    sep='\t', header=True, index=False)

In [None]:
# HIT VERSUS MISS TRIAL CLASSIFICATION

# build data structure to store accuracy data and coefficients
hit_miss_data = pd.DataFrame()
hit_miss_data.insert(loc = 0, column = 'dccid', value = 'None', allow_duplicates=True)
# hit_miss_data.insert(loc = 1, column = 'diagnosis', value = 'None', allow_duplicates=True)
for i in range(0, 7):
    hit_miss_data.insert(loc = hit_miss_data.shape[1], column = 'CV'+str(i+1)+'_acc', value = NaN, allow_duplicates=True)
hit_miss_data.insert(loc = hit_miss_data.shape[1], column = 'TrainSet_MeanCV_acc', value = 'None', allow_duplicates=True)
hit_miss_data.insert(loc = hit_miss_data.shape[1], column = 'TestSet_acc', value = 'None', allow_duplicates=True)

for sub in hm_subs:
    print(sub)
    s_data = [sub]
    # load subject's beta maps (one per trial)
    betas = image.load_img(img=pjoin(beta_dir, str(sub), 'TrialContrasts/betas_sub'+str(sub)+'*.nii'),
                           wildcards=True)
    # initialize NiftiLabelMasker object    
    sub_mask = nb.load(pjoin(mask_dir, 'func_sub'+str(sub)+'_mask_stereonl.nii'))
    sub_masker = NiftiMasker(mask_img=sub_mask, standardize=True)
    # transform subject's beta maps into vector of network means per trial
    X_hit_miss_ctl = sub_masker.fit_transform(betas)   
    
    # load subject's trial labels
    labels_file = pjoin(label_dir, 'sub-'+str(sub)+'_ctl_miss_hit.tsv')
    y_hit_miss_ctl = pd.read_csv(labels_file, sep='\t')
    y_hit_miss_ctl_labels = y_hit_miss_ctl['ctl_miss_hit']
    # mask X and y data to exclude trials of no interest
    hit_miss_mask = y_hit_miss_ctl_labels.isin(['hit', 'missed'])
    y_hit_miss = y_hit_miss_ctl_labels[hit_miss_mask]      
    X_hit_miss  = X_hit_miss_ctl[hit_miss_mask]
    
    # Split trials into a training and a test set
    X_train, X_test, y_train, y_test = train_test_split(
        X_hit_miss, # x
        y_hit_miss, # y
        test_size = 0.4, # 60%/40% split
        shuffle = True, # shuffle dataset before splitting
        stratify = y_hit_miss, # keep distribution of conditions consistent betw. train & test sets
        #random_state = 123  # if set number, same shuffle each time, otherwise randomization algo
        ) 
    print('training:', len(X_train), 'testing:', len(X_test))
    print(y_train.value_counts(), y_test.value_counts())
    
    # define the model
    sub_svc = SVC(kernel='linear', class_weight='balanced')
    
    # do cross-validation to evaluate model performance
    # within 10 folds of training set
    # predict
    y_pred = cross_val_predict(sub_svc, X_train, y_train,
                               groups=y_train, cv=7)
    # scores
    cv_acc = cross_val_score(sub_svc, X_train, y_train,
                         groups=y_train, cv=7)
    print(cv_acc)
    
    for i in range(0, len(cv_acc)):
        s_data.append(cv_acc[i])
        
    # evaluate overall model performance on training data
    overall_acc = accuracy_score(y_pred = y_pred, y_true = y_train)
    overall_cr = classification_report(y_pred = y_pred, y_true = y_train)
    print('Accuracy:',overall_acc)
    print(overall_cr)
    
    s_data.append(overall_acc)

    # Test model on unseen data from the test set
    sub_svc.fit(X_train, y_train)
    y_pred = sub_svc.predict(X_test) # classify age class using testing data
    acc = sub_svc.score(X_test, y_test) # get accuracy

    cr = classification_report(y_pred=y_pred, y_true=y_test) # get prec., recall & f1
    # print results
    print('accuracy =', acc)
    print(cr)  
    
    s_data.append(acc)
    
    # get map of coefficients    
    # coef_ = sub_svc.coef_
    # print(coef_.shape)
    #Return voxel weights into a nifti image using the NiftiMasker
    # coef_img = sub_masker.inverse_transform(coef_)
    #Save .nii to file
    # coef_img.to_filename(pjoin(output_dir, 'Coef_maps', 'SVC_coeff_hit_miss_sub-'+str(sub)+'.nii'))

    hit_miss_data = hit_miss_data.append(pd.Series(s_data, index=hit_miss_data.columns), ignore_index=True)

demo_data = hm_data.copy()
demo_data.reset_index(level=None, drop=False, inplace=True)

hit_miss_data.insert(loc = 1, column = 'cognitive_status', value = demo_data['cognitive_status'], allow_duplicates=True)
hit_miss_data.insert(loc = 2, column = 'total_scrubbed_frames', value = demo_data['total_scrubbed_frames'], allow_duplicates=True)
hit_miss_data.insert(loc = 3, column = 'mean_FD', value = demo_data['mean_FD'], allow_duplicates=True)
hit_miss_data.insert(loc = 4, column = 'hits', value = demo_data['hits'], allow_duplicates=True)
hit_miss_data.insert(loc = 5, column = 'miss', value = demo_data['miss'], allow_duplicates=True)
hit_miss_data.insert(loc = 6, column = 'correct_source', value = demo_data['correct_source'], allow_duplicates=True)
hit_miss_data.insert(loc = 7, column = 'wrong_source', value = demo_data['wrong_source'], allow_duplicates=True)
hit_miss_data.insert(loc = 8, column = 'dprime', value = demo_data['dprime'], allow_duplicates=True)
hit_miss_data.insert(loc = 9, column = 'associative_memScore', value = demo_data['associative_memScore'], allow_duplicates=True)    

hit_miss_data.to_csv(pjoin(output_dir, 'SVC_withinSub_hit_miss_wholeBrain.tsv'),
    sep='\t', header=True, index=False)


In [17]:
# CORRECT SOURCE VERSUS WRONG SOURCE TRIAL CLASSIFICATION

# build data structure to store accuracy data and coefficients
cs_ws_data = pd.DataFrame()
cs_ws_data.insert(loc = 0, column = 'dccid', value = 'None', allow_duplicates=True)
# cs_ws_data.insert(loc = 1, column = 'diagnosis', value = 'None', allow_duplicates=True)
for i in range(0, 7):
    cs_ws_data.insert(loc = cs_ws_data.shape[1], column = 'CV'+str(i+1)+'_acc', value = NaN, allow_duplicates=True)
cs_ws_data.insert(loc = cs_ws_data.shape[1], column = 'TrainSet_MeanCV_acc', value = 'None', allow_duplicates=True)
cs_ws_data.insert(loc = cs_ws_data.shape[1], column = 'TestSet_acc', value = 'None', allow_duplicates=True)

for sub in cw_subs:
    print(sub)
    s_data = [sub]
    # load subject's beta maps (one per trial)
    betas = image.load_img(img=pjoin(beta_dir, str(sub), 'TrialContrasts/betas_sub'+str(sub)+'*.nii'),
                           wildcards=True)
    # initialize NiftiLabelMasker object    
    sub_mask = nb.load(pjoin(mask_dir, 'func_sub'+str(sub)+'_mask_stereonl.nii'))
    sub_masker = NiftiMasker(mask_img=sub_mask, standardize=True)
                   
    # transform subject's beta maps into vector of network means per trial
    X_cs_ws_miss_ctl = sub_masker.fit_transform(betas)  
    
    # load subject's trial labels
    labels_file = pjoin(label_dir, 'sub-'+str(sub)+'_ctl_miss_ws_cs.tsv')
    y_cs_ws_miss_ctl = pd.read_csv(labels_file, sep='\t')
    y_cs_ws_miss_ctl_labels = y_cs_ws_miss_ctl['ctl_miss_ws_cs']
    # mask X and y data to exclude trials of no interest
    cs_ws_mask = y_cs_ws_miss_ctl_labels.isin(['correctsource', 'wrongsource'])
    y_cs_ws = y_cs_ws_miss_ctl_labels[cs_ws_mask]      
    X_cs_ws  = X_cs_ws_miss_ctl[cs_ws_mask]
    
    # Split trials into a training and a test set
    X_train, X_test, y_train, y_test = train_test_split(
        X_cs_ws, # x
        y_cs_ws, # y
        test_size = 0.4, # 60%/40% split
        shuffle = True, # shuffle dataset before splitting
        stratify = y_cs_ws, # keep distribution of conditions consistent betw. train & test sets
        #random_state = 123  # if set number, same shuffle each time, otherwise randomization algo
        ) 
    print('training:', len(X_train), 'testing:', len(X_test))
    print(y_train.value_counts(), y_test.value_counts())
    
    # define the model
    sub_svc = SVC(kernel='linear', class_weight='balanced')
    
    # do cross-validation to evaluate model performance
    # within 10 folds of training set
    # predict
    y_pred = cross_val_predict(sub_svc, X_train, y_train,
                               groups=y_train, cv=7)
    # scores
    cv_acc = cross_val_score(sub_svc, X_train, y_train,
                         groups=y_train, cv=7)
    print(cv_acc)
    
    for i in range(0, len(cv_acc)):
        s_data.append(cv_acc[i])
        
    # evaluate overall model performance on training data
    overall_acc = accuracy_score(y_pred = y_pred, y_true = y_train)
    overall_cr = classification_report(y_pred = y_pred, y_true = y_train)
    print('Accuracy:',overall_acc)
    print(overall_cr)
    
    s_data.append(overall_acc)

    # Test model on unseen data from the test set
    sub_svc.fit(X_train, y_train)
    y_pred = sub_svc.predict(X_test) # classify age class using testing data
    acc = sub_svc.score(X_test, y_test) # get accuracy

    cr = classification_report(y_pred=y_pred, y_true=y_test) # get prec., recall & f1
    # print results
    print('accuracy =', acc)
    print(cr)  
    
    s_data.append(acc)
    
    # get map of coefficients    
    # coef_ = sub_svc.coef_
    # print(coef_.shape)
    #Return voxel weights into a nifti image using the NiftiMasker
    # coef_img = sub_masker.inverse_transform(coef_)
    #Save .nii to file
    # coef_img.to_filename(pjoin(output_dir, 'Coef_maps', 'SVC_coeff_cs_ws_sub-'+str(sub)+'.nii'))
    
    cs_ws_data = cs_ws_data.append(pd.Series(s_data, index=cs_ws_data.columns), ignore_index=True)

demo_data = cw_data.copy()
demo_data.reset_index(level=None, drop=False, inplace=True)

cs_ws_data.insert(loc = 1, column = 'cognitive_status', value = demo_data['cognitive_status'], allow_duplicates=True)
cs_ws_data.insert(loc = 2, column = 'total_scrubbed_frames', value = demo_data['total_scrubbed_frames'], allow_duplicates=True)
cs_ws_data.insert(loc = 3, column = 'mean_FD', value = demo_data['mean_FD'], allow_duplicates=True)
cs_ws_data.insert(loc = 4, column = 'hits', value = demo_data['hits'], allow_duplicates=True)
cs_ws_data.insert(loc = 5, column = 'miss', value = demo_data['miss'], allow_duplicates=True)
cs_ws_data.insert(loc = 6, column = 'correct_source', value = demo_data['correct_source'], allow_duplicates=True)
cs_ws_data.insert(loc = 7, column = 'wrong_source', value = demo_data['wrong_source'], allow_duplicates=True)
cs_ws_data.insert(loc = 8, column = 'dprime', value = demo_data['dprime'], allow_duplicates=True)
cs_ws_data.insert(loc = 9, column = 'associative_memScore', value = demo_data['associative_memScore'], allow_duplicates=True)    

cs_ws_data.to_csv(pjoin(output_dir, 'SVC_withinSub_cs_ws_wholeBrain.tsv'),
    sep='\t', header=True, index=False)


108391
training: 37 testing: 26
correctsource    25
wrongsource      12
Name: ctl_miss_ws_cs, dtype: int64 correctsource    18
wrongsource       8
Name: ctl_miss_ws_cs, dtype: int64
[0.66666667 0.33333333 0.66666667 0.66666667 0.6        1.
 0.5       ]
Accuracy: 0.6216216216216216
               precision    recall  f1-score   support

correctsource       0.68      0.84      0.75        25
  wrongsource       0.33      0.17      0.22        12

    micro avg       0.62      0.62      0.62        37
    macro avg       0.51      0.50      0.49        37
 weighted avg       0.57      0.62      0.58        37

accuracy = 0.6538461538461539
               precision    recall  f1-score   support

correctsource       0.76      0.72      0.74        18
  wrongsource       0.44      0.50      0.47         8

    micro avg       0.65      0.65      0.65        26
    macro avg       0.60      0.61      0.61        26
 weighted avg       0.67      0.65      0.66        26

122922
training: 33 t

accuracy = 0.6296296296296297
               precision    recall  f1-score   support

correctsource       0.75      0.75      0.75        20
  wrongsource       0.29      0.29      0.29         7

    micro avg       0.63      0.63      0.63        27
    macro avg       0.52      0.52      0.52        27
 weighted avg       0.63      0.63      0.63        27

258618
training: 30 testing: 21
correctsource    20
wrongsource      10
Name: ctl_miss_ws_cs, dtype: int64 correctsource    14
wrongsource       7
Name: ctl_miss_ws_cs, dtype: int64
[0.4        0.8        0.4        0.5        0.75       0.5
 0.66666667]
Accuracy: 0.5666666666666667
               precision    recall  f1-score   support

correctsource       0.67      0.70      0.68        20
  wrongsource       0.33      0.30      0.32        10

    micro avg       0.57      0.57      0.57        30
    macro avg       0.50      0.50      0.50        30
 weighted avg       0.56      0.57      0.56        30

accuracy = 0.6190476

[0.66666667 0.66666667 0.6        0.8        1.         0.25
 0.5       ]
Accuracy: 0.6470588235294118
               precision    recall  f1-score   support

correctsource       0.74      0.80      0.77        25
  wrongsource       0.29      0.22      0.25         9

    micro avg       0.65      0.65      0.65        34
    macro avg       0.51      0.51      0.51        34
 weighted avg       0.62      0.65      0.63        34

accuracy = 0.6666666666666666
               precision    recall  f1-score   support

correctsource       0.71      0.88      0.79        17
  wrongsource       0.33      0.14      0.20         7

    micro avg       0.67      0.67      0.67        24
    macro avg       0.52      0.51      0.49        24
 weighted avg       0.60      0.67      0.62        24

427357
training: 45 testing: 30
wrongsource      26
correctsource    19
Name: ctl_miss_ws_cs, dtype: int64 wrongsource      17
correctsource    13
Name: ctl_miss_ws_cs, dtype: int64
[0.42857143 0.57142

accuracy = 0.5555555555555556
               precision    recall  f1-score   support

correctsource       0.64      0.56      0.60        16
  wrongsource       0.46      0.55      0.50        11

    micro avg       0.56      0.56      0.56        27
    macro avg       0.55      0.55      0.55        27
 weighted avg       0.57      0.56      0.56        27

567214
training: 34 testing: 24
correctsource    17
wrongsource      17
Name: ctl_miss_ws_cs, dtype: int64 correctsource    12
wrongsource      12
Name: ctl_miss_ws_cs, dtype: int64
[0.5        0.83333333 0.5        0.75       0.75       0.5
 0.5       ]
Accuracy: 0.6176470588235294
               precision    recall  f1-score   support

correctsource       0.62      0.59      0.61        17
  wrongsource       0.61      0.65      0.63        17

    micro avg       0.62      0.62      0.62        34
    macro avg       0.62      0.62      0.62        34
 weighted avg       0.62      0.62      0.62        34

accuracy = 0.5
     

[0.5        0.5        0.33333333 0.66666667 0.5        0.5
 1.        ]
Accuracy: 0.55
               precision    recall  f1-score   support

correctsource       0.50      0.44      0.47         9
  wrongsource       0.58      0.64      0.61        11

    micro avg       0.55      0.55      0.55        20
    macro avg       0.54      0.54      0.54        20
 weighted avg       0.55      0.55      0.55        20

accuracy = 0.5714285714285714
               precision    recall  f1-score   support

correctsource       0.60      0.43      0.50         7
  wrongsource       0.56      0.71      0.63         7

    micro avg       0.57      0.57      0.57        14
    macro avg       0.58      0.57      0.56        14
 weighted avg       0.58      0.57      0.56        14

778749
training: 27 testing: 19
correctsource    14
wrongsource      13
Name: ctl_miss_ws_cs, dtype: int64 wrongsource      10
correctsource     9
Name: ctl_miss_ws_cs, dtype: int64
[0.25       0.25       0.5        

training: 40 testing: 27
correctsource    23
wrongsource      17
Name: ctl_miss_ws_cs, dtype: int64 correctsource    15
wrongsource      12
Name: ctl_miss_ws_cs, dtype: int64
[0.28571429 0.57142857 0.5        0.6        0.2        0.4
 0.4       ]
Accuracy: 0.425
               precision    recall  f1-score   support

correctsource       0.50      0.43      0.47        23
  wrongsource       0.35      0.41      0.38        17

    micro avg       0.42      0.42      0.42        40
    macro avg       0.42      0.42      0.42        40
 weighted avg       0.44      0.42      0.43        40

accuracy = 0.5185185185185185
               precision    recall  f1-score   support

correctsource       0.55      0.73      0.63        15
  wrongsource       0.43      0.25      0.32        12

    micro avg       0.52      0.52      0.52        27
    macro avg       0.49      0.49      0.47        27
 weighted avg       0.50      0.52      0.49        27

936730
training: 27 testing: 19
corrects

In [18]:
# CORRECT SOURCE VERSUS MISSED TRIAL CLASSIFICATION

# build data structure to store accuracy data and coefficients
cs_miss_data = pd.DataFrame()
cs_miss_data.insert(loc = 0, column = 'dccid', value = 'None', allow_duplicates=True)
# cs_miss_data.insert(loc = 1, column = 'diagnosis', value = 'None', allow_duplicates=True)
for i in range(0, 7):
    cs_miss_data.insert(loc = cs_miss_data.shape[1], column = 'CV'+str(i+1)+'_acc', value = NaN, allow_duplicates=True)
cs_miss_data.insert(loc = cs_miss_data.shape[1], column = 'TrainSet_MeanCV_acc', value = 'None', allow_duplicates=True)
cs_miss_data.insert(loc = cs_miss_data.shape[1], column = 'TestSet_acc', value = 'None', allow_duplicates=True)

for sub in cmiss_subs:
    print(sub)
    s_data = [sub]
    # load subject's beta maps (one per trial)
    betas = image.load_img(img=pjoin(beta_dir, str(sub), 'TrialContrasts/betas_sub'+str(sub)+'*.nii'),
                           wildcards=True)
    # initialize NiftiLabelMasker object    
    sub_mask = nb.load(pjoin(mask_dir, 'func_sub'+str(sub)+'_mask_stereonl.nii'))
    sub_masker = NiftiMasker(mask_img=sub_mask, standardize=True)
                   
    # transform subject's beta maps into vector of network means per trial
    X_cs_ws_miss_ctl = sub_masker.fit_transform(betas) 
    
    # load subject's trial labels
    labels_file = pjoin(label_dir, 'sub-'+str(sub)+'_ctl_miss_ws_cs.tsv')
    y_cs_ws_miss_ctl = pd.read_csv(labels_file, sep='\t')
    y_cs_ws_miss_ctl_labels = y_cs_ws_miss_ctl['ctl_miss_ws_cs']
    # mask X and y data to exclude trials of no interest
    cs_miss_mask = y_cs_ws_miss_ctl_labels.isin(['correctsource', 'missed'])
    y_cs_miss = y_cs_ws_miss_ctl_labels[cs_miss_mask]      
    X_cs_miss  = X_cs_ws_miss_ctl[cs_miss_mask]
    
    # Split trials into a training and a test set
    X_train, X_test, y_train, y_test = train_test_split(
        X_cs_miss, # x
        y_cs_miss, # y
        test_size = 0.4, # 60%/40% split
        shuffle = True, # shuffle dataset before splitting
        stratify = y_cs_miss, # keep distribution of conditions consistent betw. train & test sets
        #random_state = 123  # if set number, same shuffle each time, otherwise randomization algo
        ) 
    print('training:', len(X_train), 'testing:', len(X_test))
    print(y_train.value_counts(), y_test.value_counts())
    
    # define the model
    sub_svc = SVC(kernel='linear', class_weight='balanced')
    
    # do cross-validation to evaluate model performance
    # within 10 folds of training set
    # predict
    y_pred = cross_val_predict(sub_svc, X_train, y_train,
                               groups=y_train, cv=7)
    # scores
    cv_acc = cross_val_score(sub_svc, X_train, y_train,
                         groups=y_train, cv=7)
    print(cv_acc)
    
    for i in range(0, len(cv_acc)):
        s_data.append(cv_acc[i])
        
    # evaluate overall model performance on training data
    overall_acc = accuracy_score(y_pred = y_pred, y_true = y_train)
    overall_cr = classification_report(y_pred = y_pred, y_true = y_train)
    print('Accuracy:',overall_acc)
    print(overall_cr)
    
    s_data.append(overall_acc)

    # Test model on unseen data from the test set
    sub_svc.fit(X_train, y_train)
    y_pred = sub_svc.predict(X_test) # classify age class using testing data
    acc = sub_svc.score(X_test, y_test) # get accuracy

    cr = classification_report(y_pred=y_pred, y_true=y_test) # get prec., recall & f1
    # print results
    print('accuracy =', acc)
    print(cr)  
    
    s_data.append(acc)
    
    # get map of coefficients    
    # coef_ = sub_svc.coef_
    # print(coef_.shape)
    #Return voxel weights into a nifti image using the NiftiMasker
    # coef_img = sub_masker.inverse_transform(coef_)
    #Save .nii to file
    # coef_img.to_filename(pjoin(output_dir, 'Coef_maps', 'SVC_coeff_cs_ws_sub-'+str(sub)+'.nii'))
        
    cs_miss_data = cs_miss_data.append(pd.Series(s_data, index=cs_miss_data.columns), ignore_index=True)

demo_data = cmiss_data.copy()
demo_data.reset_index(level=None, drop=False, inplace=True)

cs_miss_data.insert(loc = 1, column = 'cognitive_status', value = demo_data['cognitive_status'], allow_duplicates=True)
cs_miss_data.insert(loc = 2, column = 'total_scrubbed_frames', value = demo_data['total_scrubbed_frames'], allow_duplicates=True)
cs_miss_data.insert(loc = 3, column = 'mean_FD', value = demo_data['mean_FD'], allow_duplicates=True)
cs_miss_data.insert(loc = 4, column = 'hits', value = demo_data['hits'], allow_duplicates=True)
cs_miss_data.insert(loc = 5, column = 'miss', value = demo_data['miss'], allow_duplicates=True)
cs_miss_data.insert(loc = 6, column = 'correct_source', value = demo_data['correct_source'], allow_duplicates=True)
cs_miss_data.insert(loc = 7, column = 'wrong_source', value = demo_data['wrong_source'], allow_duplicates=True)
cs_miss_data.insert(loc = 8, column = 'dprime', value = demo_data['dprime'], allow_duplicates=True)
cs_miss_data.insert(loc = 9, column = 'associative_memScore', value = demo_data['associative_memScore'], allow_duplicates=True)    

cs_miss_data.to_csv(pjoin(output_dir, 'SVC_withinSub_cs_miss_wholeBrain.tsv'),
                    sep='\t', header=True, index=False)


108391
training: 34 testing: 24
correctsource    25
missed            9
Name: ctl_miss_ws_cs, dtype: int64 correctsource    18
missed            6
Name: ctl_miss_ws_cs, dtype: int64
[0.66666667 0.66666667 0.6        0.4        0.75       1.
 0.5       ]
Accuracy: 0.6470588235294118
               precision    recall  f1-score   support

correctsource       0.72      0.84      0.78        25
       missed       0.20      0.11      0.14         9

    micro avg       0.65      0.65      0.65        34
    macro avg       0.46      0.48      0.46        34
 weighted avg       0.59      0.65      0.61        34

accuracy = 0.625
               precision    recall  f1-score   support

correctsource       0.76      0.72      0.74        18
       missed       0.29      0.33      0.31         6

    micro avg       0.62      0.62      0.62        24
    macro avg       0.53      0.53      0.53        24
 weighted avg       0.64      0.62      0.63        24

122922
training: 28 testing: 20
co

accuracy = 0.64
               precision    recall  f1-score   support

correctsource       0.71      0.75      0.73        16
       missed       0.50      0.44      0.47         9

    micro avg       0.64      0.64      0.64        25
    macro avg       0.60      0.60      0.60        25
 weighted avg       0.63      0.64      0.63        25

314409
training: 40 testing: 28
correctsource    21
missed           19
Name: ctl_miss_ws_cs, dtype: int64 correctsource    14
missed           14
Name: ctl_miss_ws_cs, dtype: int64
[0.5        0.16666667 0.83333333 0.83333333 0.5        0.6
 0.4       ]
Accuracy: 0.55
               precision    recall  f1-score   support

correctsource       0.58      0.52      0.55        21
       missed       0.52      0.58      0.55        19

    micro avg       0.55      0.55      0.55        40
    macro avg       0.55      0.55      0.55        40
 weighted avg       0.55      0.55      0.55        40

accuracy = 0.6428571428571429
               pre

accuracy = 0.8
               precision    recall  f1-score   support

correctsource       0.81      0.96      0.88        23
       missed       0.67      0.29      0.40         7

    micro avg       0.80      0.80      0.80        30
    macro avg       0.74      0.62      0.64        30
 weighted avg       0.78      0.80      0.77        30

458807
training: 36 testing: 24
correctsource    21
missed           15
Name: ctl_miss_ws_cs, dtype: int64 correctsource    14
missed           10
Name: ctl_miss_ws_cs, dtype: int64
[0.5 0.6 0.8 0.4 0.6 0.6 0.6]
Accuracy: 0.5833333333333334
               precision    recall  f1-score   support

correctsource       0.62      0.76      0.68        21
       missed       0.50      0.33      0.40        15

    micro avg       0.58      0.58      0.58        36
    macro avg       0.56      0.55      0.54        36
 weighted avg       0.57      0.58      0.56        36

accuracy = 0.5833333333333334
               precision    recall  f1-score   s

accuracy = 0.44
               precision    recall  f1-score   support

correctsource       0.20      0.09      0.13        11
       missed       0.50      0.71      0.59        14

    micro avg       0.44      0.44      0.44        25
    macro avg       0.35      0.40      0.36        25
 weighted avg       0.37      0.44      0.38        25

729722
training: 40 testing: 28
correctsource    26
missed           14
Name: ctl_miss_ws_cs, dtype: int64 correctsource    18
missed           10
Name: ctl_miss_ws_cs, dtype: int64
[0.5        0.5        0.66666667 0.33333333 0.66666667 0.6
 0.2       ]
Accuracy: 0.5
               precision    recall  f1-score   support

correctsource       0.59      0.73      0.66        26
       missed       0.12      0.07      0.09        14

    micro avg       0.50      0.50      0.50        40
    macro avg       0.36      0.40      0.37        40
 weighted avg       0.43      0.50      0.46        40

accuracy = 0.6071428571428571
               prec

[0.66666667 0.5        0.66666667 0.5        0.4        0.6
 0.75      ]
Accuracy: 0.5789473684210527
               precision    recall  f1-score   support

correctsource       0.57      0.44      0.50        18
       missed       0.58      0.70      0.64        20

    micro avg       0.58      0.58      0.58        38
    macro avg       0.58      0.57      0.57        38
 weighted avg       0.58      0.58      0.57        38

accuracy = 0.6153846153846154
               precision    recall  f1-score   support

correctsource       0.58      0.85      0.69        13
       missed       0.71      0.38      0.50        13

    micro avg       0.62      0.62      0.62        26
    macro avg       0.65      0.62      0.59        26
 weighted avg       0.65      0.62      0.59        26

983291
training: 40 testing: 28
correctsource    29
missed           11
Name: ctl_miss_ws_cs, dtype: int64 correctsource    21
missed            7
Name: ctl_miss_ws_cs, dtype: int64
[0.71428571 0.333333