In [4]:
import classification
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import KFold
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier
import numpy as np
import os
import sys
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn import metrics
import itertools
import classification
#import other python scripts for further anlaysis
#import reshape
#import results
import warnings
warnings.filterwarnings("ignore")
# Initialization of directory information:
thisDir = os.path.expanduser('~/Desktop/MSC_Alexis/analysis/')
dataDir = thisDir + 'data/mvpa_data/'
outDir = thisDir + 'output/mL/results/ridge/permutation/'

# Subjects and tasks
taskList=['mixed', 'motor','mem']
#taskList=['glass','semantic', 'motor','mem']
subList=['MSC01','MSC02','MSC03','MSC04','MSC05','MSC06','MSC07','MSC10']
#subList=['MSC05','MSC06','MSC07']
#all possible combinations of subs and tasks
subsComb=(list(itertools.permutations(subList, 2)))
tasksComb=(list(itertools.permutations(taskList, 2)))
#DS combination
DSvars=list(itertools.product(list(subsComb),list(taskList)))
##SS combination
SSvars=list(itertools.product(list(subList),list(tasksComb)))
#BS combination
BSvars=list(itertools.product(list(subsComb),list(tasksComb)))

    
def classifyDS():
    """
    Classifying different subjects (DS) along the same task

    Parameters
    -------------
    classifier : str
            The statistical method used for classification
    analysis : str
            The type of analysis to be conducted

    Returns
    -------------
    dfDS : DataFrame
        Dataframe consisting of average accuracy across all subjects

    """
    acc_scores_per_task=[]
    tmp_df=pd.DataFrame(DSvars, columns=['sub','task'])
    dfDS=pd.DataFrame()
    dfDS[['train_sub','test_sub']]=pd.DataFrame(tmp_df['sub'].tolist())
    dfDS['task']=tmp_df['task']
    for index, row in dfDS.iterrows():
        score=model('DS', train_sub=row['train_sub'], test_sub=row['test_sub'], train_task=row['task'], test_task=row['task'])
        acc_scores_per_task.append(score)
    dfDS['acc']=acc_scores_per_task
    return dfDS
    
    
def classifySS():
    """
    Classifying the same subject (SS) along a different task

    Parameters
    -------------
    classifier : str
            The statistical method used for classification
    analysis : str
            The type of analysis to be conducted

    Returns
    -------------
    dfSS : DataFrame
        Dataframe consisting of average accuracy across all subjects

    """
    acc_scores_per_task=[]
    tmp_df=pd.DataFrame(SSvars, columns=['sub','task'])
    dfSS=pd.DataFrame()
    dfSS[['train_task','test_task']]=pd.DataFrame(tmp_df['task'].tolist())
    dfSS['sub']=tmp_df['sub']
    for index, row in dfSS.iterrows():
        score=model('SS', train_sub=row['sub'], test_sub=row['sub'], train_task=row['train_task'], test_task=row['test_task'])
        acc_scores_per_task.append(score)
    dfSS['acc']=acc_scores_per_task
    return dfSS
def classifyBS():
    """
    Classifying different subjects (BS) along different tasks

    Parameters
    -------------
    classifier : str
            The statistical method used for classification
    analysis : str
            The type of analysis to be conducted

    Returns
    -------------
    dfBS : DataFrame
        Dataframe consisting of average accuracy across all subjects

    """
    #BS=pd.DataFrame(columns=['train_task','test_task','train_sub','test_sub'])
    acc_scores_per_task=[]
    tmp_df=pd.DataFrame(BSvars, columns=['sub','task'])
    dfBS=pd.DataFrame()
    dfBS[['train_task','test_task']]=pd.DataFrame(tmp_df['task'].tolist())
    dfBS[['train_sub', 'test_sub']]=pd.DataFrame(tmp_df['sub'].tolist())
    for index, row in dfBS.iterrows():
        score=model('BS', train_sub=row['train_sub'], test_sub=row['test_sub'], train_task=row['train_task'], test_task=row['test_task'])
        acc_scores_per_task.append(score)
    dfBS['acc']=acc_scores_per_task
    return dfBS

def classifyCV():
    """
    Classifying same subjects (CV) along the same task

    Parameters
    -------------
    classifier : str
            The statistical method used for classification
    analysis : str
            The type of analysis to be conducted

    Returns
    -------------
    dfCV : DataFrame
        Dataframe consisting of average accuracy across all subjects
    """

    clf=RidgeClassifier()
    avg_CV=[]
    for task in taskList:
        cvTable=[]
        acc_scores=[]
        for sub in subList:
            taskFC=classification.matFiles(dataDir+task+'/'+sub+'_parcel_corrmat.mat')
            restFC=classification.matFiles(dataDir+'rest/'+sub+'_parcel_corrmat.mat')
            folds=taskFC.shape[0]
            x_train, y_train=classification.concateFC(taskFC, restFC)
            for i in range(1000):
                y_train=np.random.permutation(y_train)
                CVscores=cross_val_score(clf, x_train, y_train, cv=folds)
                mu=CVscores.mean()
                acc_scores.append(mu)
        tmp_df=pd.DataFrame({task:acc_scores})
        avg_CV.append(tmp_df)
    dfCV=pd.concat(avg_CV, axis=1)
    dfCV.to_csv(outDir+'CV/acc.csv',index=False)
    

def model(analysis, train_sub, test_sub, train_task, test_task):
    """
    Preparing machine learning model with appropriate data

    Parameters
    -------------
    classifier : str
            The statistical method used for classification
    analysis : string
            The type of analysis to be conducted
    train_sub : str
            Subject name for training
    test_sub : str
            Subject name for testing
    train_task : str
            Task name for training
    test_task : str
            Task name for testing

    Returns
    -------------
    total_score : float
            Average accuracy of all folds

    """

    clf=RidgeClassifier()
    taskFC=classification.matFiles(dataDir+train_task+'/'+train_sub+'_parcel_corrmat.mat')
    restFC=classification.matFiles(dataDir+'rest/'+train_sub+'_parcel_corrmat.mat')
    #if your subs are the same
    if train_sub==test_sub:
        test_taskFC=classification.matFiles(dataDir+test_task+'/'+test_sub+'_parcel_corrmat.mat')
        total_score=CV_folds(clf, analysis, taskFC, restFC, test_taskFC, restFC)
    else:
        test_taskFC=classification.matFiles(dataDir+test_task+'/'+test_sub+'_parcel_corrmat.mat')
        test_restFC=classification.matFiles(dataDir+'rest/'+test_sub+'_parcel_corrmat.mat')
        total_score=CV_folds(clf, analysis, taskFC, restFC, test_taskFC, test_restFC)
    return total_score



def CV_folds(clf, analysis, taskFC, restFC, test_taskFC, test_restFC):
    """
    Cross validation to train and test using nested loops

    Parameters
    -----------
    clf : obj
        Machine learning algorithm
    analysis : str
        Analysis type
    taskFC, restFC, test_taskFC, test_restFC : array_like
        Input arrays, training and testing set of task and rest FC
    Returns
    -----------
    total_score : float
        Average accuracy across folds
    acc_score : list
        List of accuracy for each outer fold
    """

    loo = LeaveOneOut()
    taskSize=taskFC.shape[0]
    restSize=restFC.shape[0]
    t = np.ones(taskSize, dtype = int)
    r=np.zeros(restSize, dtype=int)
    if analysis=='SS':
        df=pd.DataFrame()
        acc_score=[]
        for train_index, test_index in loo.split(taskFC):
            Xtrain_rest, Xtest_rest=restFC[train_index], restFC[test_index]
            Xtrain_task=taskFC[train_index]
            ytrain_rest=r[train_index]
            ytrain_task=t[train_index]
            X_tr=np.concatenate((Xtrain_task, Xtrain_rest))
            y_tr = np.concatenate((ytrain_task,ytrain_rest))
            y_tr=np.random.permutation(y_tr)
            clf.fit(X_tr,y_tr)
            tmpdf=pd.DataFrame()
            acc_scores_per_fold=[]
            for t_index, te_index in loo.split(test_taskFC):
                Xtest_task=test_taskFC[te_index]
                X_Test = np.concatenate((Xtest_task, Xtest_rest))
                y_Test = np.array([1, 0])
                #test set
                clf.predict(X_Test)
                #Get accuracy of model
                ACCscores=clf.score(X_Test,y_Test)
                acc_scores_per_fold.append(ACCscores)
            tmpdf['inner_fold']=acc_scores_per_fold
            score=tmpdf['inner_fold'].mean()
            acc_score.append(score)
        df['outer_fold']=acc_score
        total_score=df['outer_fold'].mean()

    else:
        df=pd.DataFrame()
        acc_score=[]
        #fold each training set
        for train_index, test_index in loo.split(taskFC):
            Xtrain_rest=restFC[train_index]
            Xtrain_task=taskFC[train_index]
            ytrain_rest=r[train_index]
            ytrain_task=t[train_index]
            X_tr=np.concatenate((Xtrain_task, Xtrain_rest))
            y_tr = np.concatenate((ytrain_task,ytrain_rest))
            y_tr=np.random.permutation(y_tr)
            clf.fit(X_tr,y_tr)
            tmpdf=pd.DataFrame()
            acc_scores_per_fold=[]
                #fold each testing set
            for t_index, te_index in loo.split(test_taskFC):
                Xtest_rest=test_restFC[te_index]
                Xtest_task=test_taskFC[te_index]
                X_te=np.concatenate((Xtest_task, Xtest_rest))
                y_te=np.array([1, 0])
                    #test set
                clf.predict(X_te)
                    #Get accuracy of model
                ACCscores=clf.score(X_te,y_te)
                acc_scores_per_fold.append(ACCscores)
            tmpdf['inner_fold']=acc_scores_per_fold
            score=tmpdf['inner_fold'].mean()
            acc_score.append(score)
        df['outer_fold']=acc_score
        total_score=df['outer_fold'].mean()

    return total_score


In [2]:
DS_perms=pd.DataFrame()
for i in range(1000):
    DS=classifyDS()
    DS_perms=pd.concat([DS_perms,DS])
DS_perms.to_csv(outDir+'DS/acc.csv',index=False)

Unnamed: 0,train_task,test_task,sub,acc
0,mixed,motor,MSC01,"(0.9949999999999999, [0.95, 1.0, 1.0, 1.0, 1.0..."
1,mixed,mem,MSC01,"(0.9349999999999999, [0.8, 0.95, 0.95, 0.95, 0..."
2,motor,mixed,MSC01,"(0.8149999999999998, [0.8, 0.8, 0.8, 0.8, 0.85..."
3,motor,mem,MSC01,"(0.6500000000000001, [0.65, 0.65, 0.65, 0.65, ..."
4,mem,mixed,MSC01,"(0.945, [0.95, 0.95, 0.9, 0.95, 0.95, 0.95, 0...."
5,mem,motor,MSC01,"(1.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,..."
6,mixed,motor,MSC02,"(0.75, [0.75, 0.75, 0.6, 0.85, 0.8, 0.7, 0.8, ..."
7,mixed,mem,MSC02,"(0.8299999999999998, [0.85, 0.75, 0.8, 0.85, 0..."
8,motor,mixed,MSC02,"(0.5, [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,..."
9,motor,mem,MSC02,"(0.5, [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,..."


In [2]:
SS=classifySS()

In [5]:
#run this one first
SS_perms=pd.DataFrame()
for i in range(1000):
    SS=classifySS()
    SS_perms=pd.concat([SS_perms,SS])
SS_perms.to_csv(outDir+'SS/acc.csv',index=False)

In [9]:
#then run this one
BS_perms=pd.DataFrame()
for i in range(1000):
    BS=classifyBS()
    BS_perms=pd.concat([BS_perms,BS])
BS_perms.to_csv(outDir+'BS/acc.csv',index=False)