# NMI Permutation Tests

## What is NMI? 
The normalized mutual information score. Its a measure of how similar two sets of numbers are. We will use this to quantify how similar community assignments are between our groups

## Why Permutation Tests? 
BY iterating over the NMI test in random groups, we can generate a null distribution for how similar the samples are in random data. This give us something to compare to. 

### Imports & Global Functions

In [34]:
# Imports 
import glob
import os
import networkx as nx
import numpy as np
import pandas as pd
import bz2
import pickle
import community
import statistics
import pdb
from scipy import stats
import scipy
from sklearn.metrics.cluster import normalized_mutual_info_score
import random

In [35]:
def create_corr_network_5(G, corr_direction, min_correlation):

    ##Creates a copy of the graph
    H = G.copy()
    
    ##Checks all the edges and removes some based on corr_direction
    for stock1, stock2, weight in list(G.edges(data=True)):
        ##if we only want to see the positive correlations we then delete the edges with weight smaller than 0        
        if corr_direction == "positive":
            ####it adds a minimum value for correlation. 
            ####If correlation weaker than the min, then it deletes the edge
            if weight["weight"] <0 or weight["weight"] < min_correlation:
                H.remove_edge(stock1, stock2)
        ##this part runs if the corr_direction is negative and removes edges with weights equal or largen than 0
        else:
            ####it adds a minimum value for correlation. 
            ####If correlation weaker than the min, then it deletes the edge
            if weight["weight"] >=0 or weight["weight"] > min_correlation:
                H.remove_edge(stock1, stock2)
    return(H)

In [36]:
def make_graphs(list_o_data, direction, min_cor):
    edge_dict={}
    cor_dict={}
    FC_dict={}
    sd_dict={}
    graph_dict={}
    partition_dict={}
    for key, values in list_o_data.items():
            #i=i.set_index(labels.ID)
            #i.rename(columns=labels.ID, inplace=True)
            ########################################
            edge_dict.setdefault(key, []).append(values)
            ########################################
            cor_matrix = np.asmatrix(values)
            x=abs(cor_matrix)
            mu=x.mean()
            sd=x.std()
            ########################################
            cor_dict.setdefault(key, []).append(x)
            ########################################
            FC_dict.setdefault(key, []).append(mu)
            sd_dict.setdefault(key, []).append(sd)
            ########################################
            G = nx.from_numpy_matrix(cor_matrix)
            #for i, nlrow in labels.iterrows():
                #G.node[i].update(nlrow[0:].to_dict())
            ########################################
            graph_dict.setdefault(key, []).append(G)
            ########################################
            partition = community.best_partition(create_corr_network_5(G, direction,min_cor))
            ########################################
            partition_dict.setdefault(key, []).append(partition)
            ########################################
    return({'edges':edge_dict, 'correlations':cor_dict, 'mean_FC':FC_dict, 'stdev':sd_dict, 'graphs':graph_dict,'modules':partition_dict})

In [37]:
#make the function to combine
def make_total_graphs(dict_o_data):
    mylist=[]
    for key, val_list in dict_o_data.items():
        for i in val_list:
            cor_matrix = np.asarray(i)
            mylist.append(cor_matrix)
    x=np.stack(mylist, axis=2)
    mu=np.mean(x, axis=(2))
    return(mu)

In [38]:
def threshold(G, corr_direction, min_correlation):

    ##Creates a copy of the graph
    H = G.copy()
    
    ##Checks all the edges and removes some based on corr_direction
    for stock1, stock2, weight in list(G.edges(data=True)):
        ##if we only want to see the positive correlations we then delete the edges with weight smaller than 0        
        if corr_direction == "positive":
            ####it adds a minimum value for correlation. 
            ####If correlation weaker than the min, then it deletes the edge
            if weight["weight"] <0 or weight["weight"] < min_correlation:
                H.remove_edge(stock1, stock2)
        ##this part runs if the corr_direction is negative and removes edges with weights equal or largen than 0
        else:
            ####it adds a minimum value for correlation. 
            ####If correlation weaker than the min, then it deletes the edge
            if weight["weight"] >=0 or weight["weight"] > min_correlation:
                H.remove_edge(stock1, stock2)
    return(H)

### Load in the Data & Generate Graphs

In [66]:
basepath = '/Users/jennygilbert/Documents/'
#Find the path to data
file_list = glob.glob(os.path.join(basepath,'betaseries_bevel/4_combine_timeseries/punishment/*.txt'))

#Setting the ditionary
my_dict={}
for item in file_list:
    name=item.split('/')[7].split('.')[0]
    #print(name)
    my_dict.setdefault(name, []).append(item)
    
#Setting the data dictionary
data_dict={}
for key, value in my_dict.items():
    for i in value:
        data_dict.setdefault(key, []).append(pd.read_csv(i, sep='\t' ,header=None,index_col=False))

#Setting up the correlation dictionary
cor_dict={}

for key, value in data_dict.items():
    value[0]
    #pdb.set_trace()
    cor_matrix = value[0].corr()
    cor_dict[key] = cor_matrix   
    
#This points to a txt file with the ROI names in a list separated by commas
path = os.path.join(basepath,'betaseries_bevel/5_analysis/labels.txt')
df_label = pd.read_csv(path, sep=',')

labels_dict = {}
n=0
for item in df_label:
    labels_dict[n]=item
    n=n+1
    
GRAPHS = make_graphs(cor_dict, "positive", 0)

### Make an ID List, Shuffle, and Split

In [67]:
id_list = ['sub-001_punish','sub-002_punish','sub-004_punish','sub-005_punish','sub-006_punish','sub-007_punish','sub-009_punish','sub-011_punish','sub-014_punish','sub-015_punish','sub-016_punish','sub-017_punish','sub-018_punish','sub-019_punish','sub-021_punish','sub-024_punish','sub-025_punish','sub-026_punish','sub-029_punish','sub-030_punish','sub-031_punish','sub-032_punish','sub-036_punish','sub-038_punish','sub-040_punish','sub-045_punish','sub-047_punish','sub-048_punish','sub-050_punish','sub-052_punish','sub-053_punish','sub-054_punish','sub-055_punish','sub-056_punish','sub-058_punish','sub-059_punish','sub-060_punish','sub-061_punish','sub-062_punish','sub-066_punish','sub-068_punish','sub-069_punish','sub-070_punish','sub-072_punish','sub-073_punish','sub-074_punish','sub-075_punish','sub-082_punish','sub-084_punish','sub-085_punish','sub-087_punish','sub-088_punish','sub-003_punish','sub-010_punish','sub-020_punish','sub-022_punish','sub-044_punish','sub-067_punish','sub-083_punish','sub-012_punish','sub-013_punish','sub-027_punish','sub-028_punish','sub-033_punish','sub-034_punish','sub-035_punish','sub-037_punish','sub-039_punish','sub-041_punish','sub-042_punish','sub-043_punish','sub-046_punish','sub-057_punish','sub-063_punish','sub-064_punish','sub-071_punish','sub-076_punish','sub-077_punish','sub-078_punish','sub-079_punish','sub-080_punish','sub-081_punish','sub-086_punish','sub-089_punish','sub-090_punish']

In [74]:
#Make a blank list for the NMI output
NMI_list = []

In [75]:
i = 0

for i in range(0, 500): 
    random.shuffle(id_list)
    a_list = id_list[0:52]
    b_list = id_list[53:85]

    a_corr_dict = { key:value for key,value in GRAPHS['correlations'].items() if key in a_list}
    b_corr_dict = { key:value for key,value in GRAPHS['correlations'].items() if key in b_list}

    mean_a_graph = make_total_graphs(a_corr_dict)
    mean_b_graph = make_total_graphs(b_corr_dict)

    mean_a_graph_mat = np.matrix(mean_a_graph)
    mean_b_graph_mat = np.matrix(mean_b_graph)

    mean_a_G = nx.from_numpy_matrix(mean_a_graph_mat)
    mean_b_G = nx.from_numpy_matrix(mean_b_graph_mat)

    partition_a = community.best_partition(mean_a_G)
    nx.set_node_attributes(mean_a_G, partition_a, 'modules')
    nx.set_node_attributes(mean_a_G, labels_dict, 'ROIs')

    partition_b = community.best_partition(mean_b_G)
    nx.set_node_attributes(mean_b_G, partition_b, 'modules')
    nx.set_node_attributes(mean_b_G, labels_dict, 'ROIs')

    thresh_a_G = threshold(mean_a_G, 'positive', 0.3)
    thresh_b_G = threshold(mean_b_G, 'positive', 0.3)

    partition_a_list = []
    for value in partition_a.values():
        partition_a_list.append(partition_a[value])

    partition_b_list = []
    for value in partition_b.values():
        partition_b_list.append(partition_b[value])
    
    NMIx = normalized_mutual_info_score(partition_a_list, partition_b_list)
    NMI_list.append(NMIx)
    NMI_list
    
    i=i+1

In [76]:
NMI_list

[0.7382295552951933,
 0.7536712341307843,
 0.7536712341307843,
 0.8358726441483001,
 0.7714836723402282,
 0.5106919015521377,
 1.0,
 0.7714836723402282,
 0.7536712341307841,
 0.8278661696132787,
 0.8237182337920514,
 0.4747324717405492,
 0.7536712341307841,
 0.7536712341307843,
 0.6513267719393331,
 0.6691061868254723,
 0.6571482921870538,
 0.6848308134347534,
 0.8358726441483001,
 0.7714836723402282,
 0.7536712341307841,
 0.7714836723402282,
 0.7735250658957237,
 1.0,
 0.5599304092029925,
 0.6491735941224952,
 0.8358726441483001,
 0.5545735253539358,
 0.7714836723402282,
 0.7714836723402282,
 0.689977793653148,
 0.7714836723402282,
 0.5599304092029925,
 0.7714836723402282,
 0.7536712341307843,
 0.6691061868254723,
 0.7536712341307841,
 0.5106919015521377,
 0.7536712341307841,
 0.7714836723402282,
 0.7714836723402282,
 0.5276863828958336,
 0.8358726441483001,
 0.7536712341307843,
 0.7536712341307843,
 0.6663612550812636,
 0.7714836723402282,
 0.7536712341307841,
 0.7536712341307843,
 0