In [5]:
from sklearn.model_selection import cross_validate
import numpy as np
import os
import sys
import pandas as pd
#import other python scripts for further anlaysis
import reshape
from sklearn.linear_model import RidgeClassifier
#import results
import warnings
warnings.filterwarnings("ignore")
# Initialization of directory information:
thisDir = os.path.expanduser('~/Desktop/MSC_Alexis/analysis/')
dataDir = thisDir + 'data/mvpa_data/'
def classifyCV(sub, task):
    """
    Classifying same subjects (CV) along the same task

    Parameters
    -------------

    Returns
    -------------
    dfCV : DataFrame
        Dataframe consisting of average accuracy across all subjects

    """
    clf=RidgeClassifier()

    taskFC=reshape.matFiles(dataDir+task+'/'+sub+'_parcel_corrmat.mat')
    restFC=reshape.matFiles(dataDir+'rest/'+sub+'_parcel_corrmat.mat')
    folds=taskFC.shape[0]
    x_train, y_train=reshape.concateFC(taskFC, restFC)
    output = cross_validate(clf, x_train, y_train, cv=folds, scoring = 'accuracy', return_estimator =True)
    return output


In [6]:
output=classifyCV('MSC01','mem')

In [8]:
for idx,estimator in enumerate(output['estimator']):
    print("Features sorted by their score for estimator {}:".format(idx))
    feature_importances = pd.DataFrame(estimator.feature_importances_,                     
                                        columns=['importance']).sort_values('importance', ascending=False)
    print(feature_importances)

Features sorted by their score for estimator 0:


AttributeError: 'RidgeClassifier' object has no attribute 'feature_importances_'

[RidgeClassifier(),
 RidgeClassifier(),
 RidgeClassifier(),
 RidgeClassifier(),
 RidgeClassifier(),
 RidgeClassifier(),
 RidgeClassifier(),
 RidgeClassifier(),
 RidgeClassifier(),
 RidgeClassifier()]

In [62]:
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_validate
import numpy as np
import os
import sys
import pandas as pd
#import other python scripts for further anlaysis
import reshape
from sklearn.linear_model import RidgeClassifier
#import results
import warnings
warnings.filterwarnings("ignore")
# Initialization of directory information:
thisDir = os.path.expanduser('~/Desktop/MSC_Alexis/analysis/')
dataDir = thisDir + 'data/mvpa_data/'
splitDict=dict([('MSC01',10),('MSC02',10),('MSC03',8),('MSC04',10),('MSC05',10),('MSC06',9),('MSC07',9),('MSC10',10)])

def modelAll(train_sub):
    """
    Preparing machine learning model with appropriate data

    Parameters
    -------------
    train_sub : str
            Subject name for training
    test_sub : str
            Subject name for testing

    Returns
    -------------
    total_score : float
            Average accuracy of all folds

    """
    session=splitDict[train_sub]
    split=np.empty((session, 55278))
    count=0
    clf=RidgeClassifier()
    df=pd.DataFrame()
    #train sub
    memFC=reshape.permROI(dataDir+'mem/'+train_sub+'_parcel_corrmat.mat')
    semFC=reshape.permROI(dataDir+'semantic/'+train_sub+'_parcel_corrmat.mat')
    glassFC=reshape.permROI(dataDir+'glass/'+train_sub+'_parcel_corrmat.mat')
    motFC=reshape.permROI(dataDir+'motor/'+train_sub+'_parcel_corrmat.mat')
    restFC=reshape.permROI(dataDir+'rest/corrmats_timesplit/fourths/'+train_sub+'_parcel_corrmat.mat') #keep tasks seperated in order to collect the right amount of days
    restFC=np.reshape(restFC,(10,4,55278)) #reshape to gather correct days
    loo = LeaveOneOut()
    fw=np.empty([memFC.shape[0],55278])
    for train_index, test_index in loo.split(split):
        memtrain=memFC[train_index]
        semtrain=semFC[train_index]
        mottrain=motFC[train_index]
        glatrain=glassFC[train_index]
        Xtrain_task=np.concatenate((memtrain,semtrain,mottrain,glatrain))
        Xtrain_rest=restFC[train_index,:,:]
        Xtrain_rest=np.reshape(Xtrain_rest,(-1,55278))
        ytrain_task = np.ones(Xtrain_task.shape[0], dtype = int)
        ytrain_rest=np.zeros(Xtrain_rest.shape[0], dtype=int)
        X_tr=np.concatenate((Xtrain_task, Xtrain_rest))
        y_tr = np.concatenate((ytrain_task,ytrain_rest))
        clf.fit(X_tr,y_tr)
        features = clf.coef_[0]
        fw[count]=features
        count=count+1
    fwAve=fw.mean(axis=0)
    results=np.empty((333))
    for rowID, null in enumerate(results):
        indices=reshape.getIndices()
        index=indices.index
        condition=indices['level_1']==rowID
        ROI=index[condition]
        ROI_list=ROI.tolist()
        tmp=fwAve[ROI_list]
        row=np.sum(np.abs(tmp))
        results[rowID]=row
    return fw, results

 

In [63]:
subList=['MSC01','MSC02','MSC03','MSC04','MSC05','MSC06','MSC07','MSC10']
Parcel_params = reshape.loadParcelParams('Gordon333','/Users/Alexis/Desktop/MSC_Alexis/analysis/data/Parcel_info/')
roi_sort = np.squeeze(Parcel_params['roi_sort'])
for train_sub in subList:
    fw, results=modelAll(train_sub)
    data={'acc':results,'roi':roi_sort}
    df=pd.DataFrame(data)
    df.sort_values(by='roi',inplace=True) 
    array=df['acc'].to_numpy()#convert back to numpy array for saving to make plots 
    array.tofile('/Users/Alexis/Desktop/MSC_Alexis/analysis/output/results/acc/ALL/subs/'+train_sub+'.csv', sep = ',')

In [65]:
fw, results=modelAll('MSC05')


In [69]:
fwSize=fw.shape[0]
Parcel_params = reshape.loadParcelParams('Gordon333','/Users/Alexis/Desktop/MSC_Alexis/analysis/data/Parcel_info/')
roi_sort = np.squeeze(Parcel_params['roi_sort'])
for i in range(fwSize):
    fold=fw[i]
    results=np.empty((333))
    for rowID, null in enumerate(results):
        indices=reshape.getIndices()
        index=indices.index
        condition=indices['level_1']==rowID
        ROI=index[condition]
        ROI_list=ROI.tolist()
        tmp=fold[ROI_list]
        row=np.sum(np.abs(tmp))
        results[rowID]=row
    data={'acc':results,'roi':roi_sort}
    df=pd.DataFrame(data)
    df.sort_values(by='roi',inplace=True) 
    array=df['acc'].to_numpy()#convert back to numpy array for saving to make plots 
    array.tofile('/Users/Alexis/Desktop/MSC_Alexis/analysis/output/results/acc/ALL/foldMSC05/fold'+str(i)+'.csv', sep = ',')

In [61]:
motFC.shape

(9, 55278)

In [50]:
row

0.08944552486542898

In [35]:
data={'acc':fwAve,'roi':roi_sort}
df=pd.DataFrame(data)
df.sort_values(by='roi',inplace=True) #take the sum of absolute value per row 

ValueError: arrays must all be same length

In [40]:
roi_sort.shape

(333,)

In [1]:
import quest_nullNet as qn
qn.run()

Finished with 14808
Finished with 10824
Finished with 8736
Finished with 4620
Finished with 5264
Finished with 3151
Finished with 494
Finished with 4060
Finished with 2375
Finished with 316
Finished with 564
Finished with 45
Finished with 21


In [6]:
import pandas as pd
import numpy as np
import featSelection as fs
SS_df=pd.DataFrame()
import os
import sys
import reshape
from statistics import mean
#import other python scripts for further anlaysis
# Initialization of directory information:
#thisDir = os.path.expanduser('~/Desktop/MSC_Alexis/analysis/')
thisDir = os.path.expanduser('~/Desktop/MSC_Alexis/analysis/')
dataDir = thisDir + 'data/mvpa_data/'
outDir = thisDir + 'output/results/subNetwork/'
netRoi=dict([('unassign',14808),('default', 10824),('visual',8736),('fp', 4620),('dan',5264),('van',3151),('salience', 494),('co', 4060),('sm', 2375),('sm-lat', 316),('auditory', 564),('pmn',45),('pon',21)])


for i in netRoi:
    #generate a new index
    idx=netRoi[i]
    SS=fs.modelAll(i)
    SS['feature']=idx
    SS['Network']=i
    SS_df=pd.concat([SS_df,SS])
    print('Finished with '+i)
SS_df.to_csv(outDir+'ALL/acc.csv', index=False)

Finished with unassign
Finished with default
Finished with visual
Finished with fp
Finished with dan
Finished with van
Finished with salience
Finished with co
Finished with sm
Finished with sm-lat
Finished with auditory
Finished with pmn
Finished with pon
