In [3]:
import classification
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import KFold
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier
import numpy as np
import os
import sys
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn import metrics
import itertools
import classification
#import other python scripts for further anlaysis
#import reshape
#import results
import warnings
warnings.filterwarnings("ignore")
# Initialization of directory information:
thisDir = os.path.expanduser('~/Desktop/MSC_Alexis/analysis/')
#using less conservative fc matrices
dataDir = thisDir + 'data/mvpa_data/'
framesDir = thisDir + 'data/mvpa_data/tmask_frames/'
outDir = thisDir + 'output/mLmin/results/ridge/frames/'

#dataDir = thisDir + 'data/mvpa_data/'
#outDir = thisDir + 'output/mL/'
# Subjects and tasks
taskList=['mixed', 'motor','mem']
#taskList=['glass','semantic', 'motor','mem']
#subList=['MSC01','MSC02','MSC03','MSC04','MSC05','MSC06','MSC07','MSC10']
subList=['MSC05','MSC06','MSC07']
#all possible combinations of subs and tasks
subsComb=(list(itertools.permutations(subList, 2)))
tasksComb=(list(itertools.permutations(taskList, 2)))
#DS combination
DSvars=list(itertools.product(list(subsComb),list(taskList)))
##SS combination
SSvars=list(itertools.product(list(subList),list(tasksComb)))
#BS combination
BSvars=list(itertools.product(list(subsComb),list(tasksComb)))

#only training on memory 
#frames
frameList=[5,10,15,20,25,30,40,50,60,70,80,90,100,125,150,175,200,225,250,275,300,325,350];

def calc_frames():
    CV_acc=[]
    SS_acc=[]
    DS_acc=[]
    BS_acc=[]
    for f in frameList:
        CV_acc.append(classifyCV(f))
        SS_acc.append(classifySS(f))
        DS_acc.append(classifyDS(f))
        BS_acc.append(classifyBS(f))
    tmp_CV=pd.DataFrame({'Frames':frameList, 'Acc':CV_acc,'Analysis':'CV'})
    tmp_SS=pd.DataFrame({'Frames':frameList, 'Acc':SS_acc,'Analysis':'SS'})
    tmp_DS=pd.DataFrame({'Frames':frameList, 'Acc':DS_acc,'Analysis':'DS'})
    tmp_BS=pd.DataFrame({'Frames':frameList, 'Acc':BS_acc,'Analysis':'BS'})
    df_all=[tmp_CV,tmp_SS,tmp_DS,tmp_BS]
    allFrames=pd.concat(df_all)
    allFrames.to_csv(outDir+'allFrames.csv')
    
def classifyDS(frames):
    """
    Classifying different subjects (DS) along the same task

    Parameters
    -------------
    classifier : str
            The statistical method used for classification
    analysis : str
            The type of analysis to be conducted

    Returns
    -------------
    dfDS : DataFrame
        Dataframe consisting of average accuracy across all subjects

    """
    acc_scores_per_task=[]
    tmp_df=pd.DataFrame(DSvars, columns=['sub','task'])
    dfDS=pd.DataFrame()
    dfDS[['train_sub','test_sub']]=pd.DataFrame(tmp_df['sub'].tolist())
    dfDS['task']=tmp_df['task']
    for index, row in dfDS.iterrows():
        score=model(frames,'DS', train_sub=row['train_sub'], test_sub=row['test_sub'], train_task='mem', test_task=row['task'])
        acc_scores_per_task.append(score)
    dfDS['acc']=acc_scores_per_task
    DS_acc=statsACC(dfDS, 'DS')
    return DS_acc
    
def classifySS(frames):
    """
    Classifying the same subject (SS) along a different task

    Parameters
    -------------
    classifier : str
            The statistical method used for classification
    analysis : str
            The type of analysis to be conducted

    Returns
    -------------
    dfSS : DataFrame
        Dataframe consisting of average accuracy across all subjects

    """
    acc_scores_per_task=[]
    tmp_df=pd.DataFrame(SSvars, columns=['sub','task'])
    dfSS=pd.DataFrame()
    dfSS[['train_task','test_task']]=pd.DataFrame(tmp_df['task'].tolist())
    dfSS['sub']=tmp_df['sub']
    for index, row in dfSS.iterrows():
        score=model(frames,'SS', train_sub=row['sub'], test_sub=row['sub'], train_task='mem', test_task=row['test_task'])
        acc_scores_per_task.append(score)
    dfSS['acc']=acc_scores_per_task
    SS_acc=statsACC(dfSS, 'SS')
    return SS_acc
def classifyBS(frames):
    """
    Classifying different subjects (BS) along different tasks

    Parameters
    -------------
    classifier : str
            The statistical method used for classification
    analysis : str
            The type of analysis to be conducted

    Returns
    -------------
    dfBS : DataFrame
        Dataframe consisting of average accuracy across all subjects

    """
    acc_scores_per_task=[]
    tmp_df=pd.DataFrame(BSvars, columns=['sub','task'])
    dfBS=pd.DataFrame()
    dfBS[['train_task','test_task']]=pd.DataFrame(tmp_df['task'].tolist())
    dfBS[['train_sub', 'test_sub']]=pd.DataFrame(tmp_df['sub'].tolist())
    for index, row in dfBS.iterrows():
        score=model(frames,'BS', train_sub=row['train_sub'], test_sub=row['test_sub'], train_task='mem', test_task=row['test_task'])
        acc_scores_per_task.append(score)
    dfBS['acc']=acc_scores_per_task
    BS_acc=statsACC(dfBS, 'BS')
    return BS_acc


def classifyCV(frames):
    """
    Classifying same subjects (CV) along the same task

    Parameters
    -------------
    classifier : str
            The statistical method used for classification
    analysis : str
            The type of analysis to be conducted

    Returns
    -------------
    dfCV : DataFrame
        Dataframe consisting of average accuracy across all subjects

        """

    clf=RidgeClassifier()
    cvTable=[]
    acc_scores=[]
    for sub in subList:
        taskFC=classification.matFiles(framesDir+'mem/'+str(frames)+'/'+sub+'_parcel_corrmat.mat')
        restFC=classification.matFiles(dataDir+'rest/'+sub+'_parcel_corrmat.mat')
        folds=taskFC.shape[0]
        x_train, y_train=classification.concateFC(taskFC, restFC)
        CVscores=cross_val_score(clf, x_train, y_train, cv=folds)
        mu=CVscores.mean()
        acc_scores.append(mu)
    dfCV=pd.DataFrame({'sub':subList, 'acc':acc_scores}).set_index('sub')
    CV_acc=statsACC(dfCV, 'CV')
    return CV_acc
    
    
def statsACC(df, analysis):
    if analysis=='CV':
        print('cross validation stats')
        mu=df.mean()
    elif analysis=='SS':
        print('same sub stats')
        mu=df['acc'].mean()
    elif analysis=='DS':
        print('diff sub stats')
        mu=df['acc'].mean()
    elif analysis=='BS':
        print('diff sub diff task stats')
        mu=df['acc'].mean()
    else:
        print('skipping stats')
    return mu





def model(frames,analysis, train_sub, test_sub, train_task, test_task):
    """
    Preparing machine learning model with appropriate data

    Parameters
    -------------
    classifier : str
            The statistical method used for classification
    analysis : string
            The type of analysis to be conducted
    train_sub : str
            Subject name for training
    test_sub : str
            Subject name for testing
    train_task : str
            Task name for training
    test_task : str
            Task name for testing

    Returns
    -------------
    total_score : float
            Average accuracy of all folds

    """

    clf=RidgeClassifier()
    df=pd.DataFrame()
    taskFC=classification.matFiles(framesDir+train_task+'/'+str(frames)+'/'+train_sub+'_parcel_corrmat.mat')
    restFC=classification.matFiles(dataDir+'rest/'+train_sub+'_parcel_corrmat.mat')
    #if your subs are the same
    if train_sub==test_sub:
        test_taskFC=classification.matFiles(dataDir+test_task+'/'+test_sub+'_parcel_corrmat.mat')
        total_score, acc_score=classification.CV_folds(clf, analysis, taskFC, restFC, test_taskFC, restFC)
    else:
        test_taskFC=classification.matFiles(dataDir+test_task+'/'+test_sub+'_parcel_corrmat.mat')
        test_restFC=classification.matFiles(dataDir+'rest/'+test_sub+'_parcel_corrmat.mat')
        total_score, acc_score=classification.CV_folds(clf, analysis, taskFC, restFC, test_taskFC, test_restFC)
    return total_score

In [4]:
calc_frames()

cross validation stats
same sub stats
diff sub stats
diff sub diff task stats
cross validation stats
same sub stats
diff sub stats
diff sub diff task stats
cross validation stats
same sub stats
diff sub stats
diff sub diff task stats
cross validation stats
same sub stats
diff sub stats
diff sub diff task stats
cross validation stats
same sub stats
diff sub stats
diff sub diff task stats
cross validation stats
same sub stats
diff sub stats
diff sub diff task stats
cross validation stats
same sub stats
diff sub stats
diff sub diff task stats
cross validation stats
same sub stats
diff sub stats
diff sub diff task stats
cross validation stats
same sub stats
diff sub stats
diff sub diff task stats
cross validation stats
same sub stats
diff sub stats
diff sub diff task stats
cross validation stats
same sub stats
diff sub stats
diff sub diff task stats
cross validation stats
same sub stats
diff sub stats
diff sub diff task stats
cross validation stats
same sub stats
diff sub stats
diff sub di