In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, LeaveOneOut
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import random, sys
sys.path.insert(0, '../scripts')

from lib import make_corrfig, linearize, get_flat_inds_for_net, triangularizeweights
from data_loader import data_loader
import warnings
warnings.simplefilter('ignore')

In [5]:
# Load Demographic data and subject lists
demo = pd.read_csv('../data/demo.csv', names=['VC', 'Age', 'Group'])

demo_ts = demo.where(demo.Group=='TS').dropna().reset_index(drop=True)
demo_hc = demo.where(demo.Group=='TFC').dropna().reset_index(drop=True)

# custom data loader reads in connectivity data for each group seperately
ts_con = data_loader(demo_ts)
hc_con = data_loader(demo_hc)

# Connectivity matricies are symettric and square. We need just the flattened upper or lower triangle, of the matrix
# to create a new design matrix

ts_con_flat = linearize(ts_con)
hc_con_flat = linearize(hc_con)


# create feature matrix
X = np.vstack((hc_con_flat, ts_con_flat))

# create label vector: 1 for HC, -1 for TS
y = np.concatenate((np.repeat(1,99), np.repeat(-1,99)))

scaler = StandardScaler()
X_scale = scaler.fit_transform(X)

### Random Forests prototyping

In [43]:
X_scale = X_scale
y = y

In [45]:
cv = LeaveOneOut()
scores = []
netlist = ['Auditory','CingOperc','CingPar','Default','DorsalAtt','FrontoPar','None', 'RetroTemp','Salience','SMhand','SMmouth','VentralAtt','Visual','Subcort']   

permutations = 50
n_estimators = 500

# Loop over the LOOCV splits indicies
for train_ix, test_ix in cv.split(X_scale):
        
    # For each split, create the respective training and test set
    X_train, X_test = X_scale[train_ix, :], X_scale[test_ix, :]
    y_train, y_test = y[train_ix], y[test_ix]

    # Train the model
    clf = RandomForestClassifier(n_estimators=n_estimators)

    clf.fit(X_train, y_train)
        
    for net in netlist:
        
        network_inds = get_flat_inds_for_net(net)
  
        temp_test_sub = np.copy(X_test)

        for j in range(permutations):

            # Permute the test subjects network connections individually 
            for i in range(len(network_inds)):
                randsamp = random.randint(0,X_scale.shape[0]-2)
                temp_test_sub[0, network_inds[i]] = X_train[randsamp,network_inds[i]]

            # Test the model using the permuted feature set
            loo_score = clf.score(temp_test_sub, y_test)

            # Keep track of the accuracy of the LOOCV with DMN permuted
            scores.append(loo_score)     
            
# with open('results'.txt', 'w') as f:
#     for item in scores:
#         f.write("%s\n" % item)

KeyboardInterrupt: 

In [None]:
permutations = 50 
num_nets = len(netlist)

scores = []

mean_score_over_permutations = [np.mean(scores[i:i+permutations]) for i in range(0,len(scores),permutations)]
mean_score_chunked_by_net = [mean_over_permutations[i:i+num_nets] for i in range(0,len(mean_over_permutations),num_nets)]
mean_score_chunked_by_net[0]

for i in range(0,num_nets):
    net_score = [fold[i] for fold in mean_score_chunked_by_net]
    scores.append(np.mean(net_score))
    

In [None]:
scores

In [None]:
def reorder_scores(scores, nets = 14):
    temp4 = list()
    temp1 = [np.mean(scores[i:i+20]) for i in range(0,len(scores),20)]
    temp2 = [temp1[i:i+nets] for i in range(0,len(temp1),nets)]
    for i in range(0,nets):
        temp3 = [sub[i] for sub in temp2]
        temp4.append(np.mean(temp3))
    return temp4