# Network Analysis for BeveL Betaseries 

Inputs: betaseries files for BeveL participants (n=85) drawn from 4 conditions: choice, reward taste, punishment taste, neutral rinse

Analysis workflow is mapped off this paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5429248/

## Print Statements are commented out to save time, remove comments if desired. 

In [1]:
import glob
import os
import networkx as nx
import numpy as np
import pandas as pd
import bz2
import pickle
import community
import statistics
import pdb
from scipy import stats
import matplotlib
matplotlib.use("Qt5Agg")
import matplotlib.pyplot as plt

## Load in the data

### Find the path to the data

In [2]:
#Find the path to data
file_list = glob.glob('/Users/jennygilbert/Documents/betaseries_bevel/4_combine_timeseries/punishment/*.txt')

In [3]:
#Check the files found
#print(file_list)

In [4]:
#Check to see how many participants 
#len(file_list)

### Make a dictionary to read in the files to pandas

In [5]:
#Setting the ditionary
my_dict={}
for item in file_list:
    name=item.split('/')[7].split('.')[0]
    print(name)
    my_dict.setdefault(name, []).append(item)

sub-001_punish
sub-002_punish
sub-003_punish
sub-004_punish
sub-005_punish
sub-006_punish
sub-007_punish
sub-009_punish
sub-010_punish
sub-011_punish
sub-012_punish
sub-013_punish
sub-014_punish
sub-015_punish
sub-016_punish
sub-017_punish
sub-018_punish
sub-019_punish
sub-020_punish
sub-021_punish
sub-022_punish
sub-024_punish
sub-025_punish
sub-026_punish
sub-027_punish
sub-028_punish
sub-029_punish
sub-030_punish
sub-031_punish
sub-032_punish
sub-033_punish
sub-034_punish
sub-035_punish
sub-036_punish
sub-037_punish
sub-038_punish
sub-039_punish
sub-040_punish
sub-041_punish
sub-042_punish
sub-043_punish
sub-044_punish
sub-045_punish
sub-046_punish
sub-047_punish
sub-048_punish
sub-050_punish
sub-052_punish
sub-053_punish
sub-054_punish
sub-055_punish
sub-056_punish
sub-057_punish
sub-058_punish
sub-059_punish
sub-060_punish
sub-061_punish
sub-062_punish
sub-063_punish
sub-064_punish
sub-066_punish
sub-067_punish
sub-068_punish
sub-069_punish
sub-070_punish
sub-071_punish
sub-072_pu

In [6]:
# Checking to make sure its populated
#my_dict['sub-024_reward']

### Read the data from the dictionary into pandas

In [7]:
#Setting the data dictionary
data_dict={}
for key, value in my_dict.items():
    for i in value:
        data_dict.setdefault(key, []).append(pd.read_csv(i, sep='\t' ,header=None,index_col=False))

In [8]:
#Check the dictionary
data_dict.keys()

dict_keys(['sub-001_punish', 'sub-002_punish', 'sub-003_punish', 'sub-004_punish', 'sub-005_punish', 'sub-006_punish', 'sub-007_punish', 'sub-009_punish', 'sub-010_punish', 'sub-011_punish', 'sub-012_punish', 'sub-013_punish', 'sub-014_punish', 'sub-015_punish', 'sub-016_punish', 'sub-017_punish', 'sub-018_punish', 'sub-019_punish', 'sub-020_punish', 'sub-021_punish', 'sub-022_punish', 'sub-024_punish', 'sub-025_punish', 'sub-026_punish', 'sub-027_punish', 'sub-028_punish', 'sub-029_punish', 'sub-030_punish', 'sub-031_punish', 'sub-032_punish', 'sub-033_punish', 'sub-034_punish', 'sub-035_punish', 'sub-036_punish', 'sub-037_punish', 'sub-038_punish', 'sub-039_punish', 'sub-040_punish', 'sub-041_punish', 'sub-042_punish', 'sub-043_punish', 'sub-044_punish', 'sub-045_punish', 'sub-046_punish', 'sub-047_punish', 'sub-048_punish', 'sub-050_punish', 'sub-052_punish', 'sub-053_punish', 'sub-054_punish', 'sub-055_punish', 'sub-056_punish', 'sub-057_punish', 'sub-058_punish', 'sub-059_punish',

In [9]:
#Check for the dataframe
#data_dict['sub-058_reward']

### Create a new dictionary with correlation matrix

In [10]:
#Setting up the correlation dictionary
cor_dict={}

for key, value in data_dict.items():
    value[0]
    #pdb.set_trace()
    cor_matrix = value[0].corr()
    cor_dict[key] = cor_matrix
    

In [11]:
#check the dictionary
#list(cor_dict.values())[3]

### Make a dictionary of labels for the nodes

In [12]:
#This points to a txt file with the ROI names in a list separated by commas
path = '/Users/jennygilbert/Documents/betaseries_bevel/5_analysis/labels.txt'
df_label = pd.read_csv(path, sep=',')

#df_label.head()

In [13]:
labels_dict = {}
n=0
for item in df_label:
    labels_dict[n]=item
    n=n+1

In [14]:
#print(labels_dict)

### Function to create a graph with positive or negative values and minimum correlation value

In [15]:
def create_corr_network_5(G, corr_direction, min_correlation):

    ##Creates a copy of the graph
    H = G.copy()
    
    ##Checks all the edges and removes some based on corr_direction
    for stock1, stock2, weight in list(G.edges(data=True)):
        ##if we only want to see the positive correlations we then delete the edges with weight smaller than 0        
        if corr_direction == "positive":
            ####it adds a minimum value for correlation. 
            ####If correlation weaker than the min, then it deletes the edge
            if weight["weight"] <0 or weight["weight"] < min_correlation:
                H.remove_edge(stock1, stock2)
        ##this part runs if the corr_direction is negative and removes edges with weights equal or largen than 0
        else:
            ####it adds a minimum value for correlation. 
            ####If correlation weaker than the min, then it deletes the edge
            if weight["weight"] >=0 or weight["weight"] > min_correlation:
                H.remove_edge(stock1, stock2)
    return(H)

### Function to make a graph object BY SUBJECT
This will return:
- The edges (noramlized R correlation matrix, in pandas dataframe)
- The correlations (absolute value of the edges in a numpy dataframe)
- The mean_FC (the mean functional connectivity per subject/run)
- The graphs (this will contain the raw graph object G as well as the the partion values from the modularity calculation)
- The modules (communitites in the network at the participant level

In [16]:
def make_graphs(list_o_data, direction, min_cor):
    edge_dict={}
    cor_dict={}
    FC_dict={}
    graph_dict={}
    partition_dict={}
    for key, values in list_o_data.items():
            #i=i.set_index(labels.ID)
            #i.rename(columns=labels.ID, inplace=True)
            ########################################
            edge_dict.setdefault(key, []).append(values)
            ########################################
            cor_matrix = np.asmatrix(values)
            x=abs(cor_matrix)
            mu=x.mean()
            ########################################
            cor_dict.setdefault(key, []).append(x)
            ########################################
            FC_dict.setdefault(key, []).append(mu)
            ########################################
            G = nx.from_numpy_matrix(cor_matrix)
            #for i, nlrow in labels.iterrows():
                #G.node[i].update(nlrow[0:].to_dict())
            ########################################
            graph_dict.setdefault(key, []).append(G)
            ########################################
            partition = community.best_partition(create_corr_network_5(G, direction,min_cor))
            ########################################
            partition_dict.setdefault(key, []).append(partition)
            ########################################
    return({'edges':edge_dict, 'correlations':cor_dict, 'mean_FC':FC_dict, 'graphs':graph_dict, 'modules':partition_dict})

### Apply the function to correlations & check output

In [17]:
# Apply function
GRAPHS = make_graphs(cor_dict, "positive", 0)

In [18]:
# Check the keys for the dictionary
#GRAPHS.keys()

In [19]:
# Check modules for one subject
#GRAPHS['modules']['sub-001_reward']

In [20]:
#Check to make sure graphs are filled
#Test = GRAPHS['graphs']['sub-001_reward'][0]
#Test.edges(data=True)

In [21]:
#GRAPHS['mean_FC']

### Get the standard deviation of the mean FC

In [22]:
#statistics.stdev(GRAPHS['mean_FC'])
o=[]

for key,value in GRAPHS['mean_FC'].items():
    o.append(value[0])

In [23]:
statistics.stdev(o)

0.06342245078069726

If this value is low (<0.1) then you don't need to threshold a graph

### Test Modularity

In [24]:
#Goal: use modularity function from communities to identify module structure that 
#emerge during reward within the sample

In [25]:
#this is pulling out the module structure for each participant
modules=[]

for key,value in GRAPHS['modules'].items():
    modules.append(value[0])

In [26]:
#print it to make sure it works
#modules

In [27]:
#made a df of the modules
df = pd.DataFrame.from_dict(modules)

In [28]:
#df

In [29]:
#label the modules
df.rename(columns={0:"Amygdala_L",1:"Amygdala_R", 2:"Dorsal_striatum_L", 3:"Dorsal_striatum_R", 4:"Fusiform_gyrus_L", 5:"Fusiform_gyrus_R", 6:"Hippocampus_L", 7:"Hippocampus_R", 8:"Insula_L",
          9:"Insula_R", 10:"Intracalcarine_cortex_L", 11:"Intracalcarine_cortex_R", 12:"lOFC_L", 13: "lOFC_R", 14: "mOFC_L", 15:"mOFC_R", 16:"Oral_somatosensory_cortex_L", 17:"Oral_somatosensory_cortex_R", 18:"Precuneus_L", 
          19:"Precuneus_R", 20:"Ventral_striatum_L", 21:"Ventral_striatum_R", 22:"vlPFC_L", 23:"vlPFC_R", 24:"vlThalamus_L" , 25:"vlThalamus_R", 26: "vmPFC_L", 27: "vmPFC_R"})

Unnamed: 0,Amygdala_L,Amygdala_R,Dorsal_striatum_L,Dorsal_striatum_R,Fusiform_gyrus_L,Fusiform_gyrus_R,Hippocampus_L,Hippocampus_R,Insula_L,Insula_R,...,Precuneus_L,Precuneus_R,Ventral_striatum_L,Ventral_striatum_R,vlPFC_L,vlPFC_R,vlThalamus_L,vlThalamus_R,vmPFC_L,vmPFC_R
0,0,1,0,0,2,3,0,0,1,1,...,5,5,6,4,1,1,1,1,6,0
1,0,0,1,1,2,2,0,0,1,1,...,4,1,1,1,4,4,2,2,1,1
2,0,0,1,1,2,2,0,0,3,3,...,2,5,0,0,5,5,4,4,2,1
3,0,0,1,1,2,2,0,0,3,3,...,0,4,1,1,5,5,3,3,1,1
4,0,0,1,1,0,2,0,0,1,3,...,0,4,4,4,5,5,2,2,1,5
5,0,1,0,0,2,3,2,2,0,0,...,3,3,0,0,1,1,0,0,2,2
6,0,0,1,1,2,3,0,0,4,4,...,3,1,0,0,5,5,4,0,2,5
7,0,0,1,1,2,2,0,0,3,3,...,5,5,6,6,7,7,3,3,6,6
8,0,1,2,2,1,0,1,1,3,3,...,5,1,0,0,4,4,5,5,4,0
9,0,0,1,1,0,1,0,0,1,1,...,0,1,2,0,3,0,0,0,0,1


In [30]:
#find the mean over the columns
#df.mean(axis = 0)

In [31]:
# Check the standard deviation over the columns
#df.std(axis = 0)

In [32]:
#find the mean over the columns
partition_median = df.median()
df_median = pd.DataFrame(partition_median)

In [33]:
# View the median module for each node
df_median

Unnamed: 0,0
0,0.0
1,0.0
2,1.0
3,1.0
4,2.0
5,2.0
6,0.0
7,0.0
8,3.0
9,2.0


### Make a Dicitionary with the modules

In [34]:
#Convert modules to dictionary
mod_dict={}
for index, row in df_median.iterrows():
    mod_dict[index]= row[0]

#mod_dict

### Combine participant correlation matrices into one mean correlation matrix

In [35]:
#make the function to combine
def make_total_graphs(dict_o_data):
    mylist=[]
    for key, val_list in dict_o_data.items():
        for i in val_list:
            cor_matrix = np.asarray(i)
            mylist.append(cor_matrix)
    x=np.stack(mylist, axis=2)
    mu=np.mean(x, axis=(2))
    return(mu)

In [36]:
#Make the mean graph with correlations
mean_graph = make_total_graphs(GRAPHS['correlations'])

In [37]:
#Check to make sure this worked 
mean_graph.shape

#Convert the graph to a numpy matrix so it can be recognized by networkX
mean_graph_mat = np.matrix(mean_graph)

#Check the mean correlation to use to threshold later
mean_graph_mat.mean()

0.3047133910764402

In [38]:
meanG = nx.from_numpy_matrix(mean_graph_mat)

In [39]:
#Add the modules and ROI labels to the graph
nx.set_node_attributes(meanG, mod_dict, 'modules')
nx.set_node_attributes(meanG, labels_dict, 'ROIs')

In [40]:
# Check to make sure this worked
ROIs=nx.get_node_attributes(meanG,'ROIs')
ROIs

{0: 'Amygdala_L',
 1: 'Amygdala_R',
 2: 'Dorsal_striatum_L',
 3: 'Dorsal_striatum_R',
 4: 'Fusiform_gyrus_L',
 5: 'Fusiform_gyrus_R',
 6: 'Hippocampus_L',
 7: 'Hippocampus_R',
 8: 'Insula_L',
 9: 'Insula_R',
 10: 'Intracalcarine_cortex_L',
 11: 'Intracalcarine_cortex_R',
 12: 'lOFC_L',
 13: 'lOFC_R',
 14: 'mOFC_L',
 15: 'mOFC_R',
 16: 'Oral_somatosensory_cortex_L',
 17: 'Oral_somatosensory_cortex_',
 18: 'Precuneus_L',
 19: 'Precuneus_R',
 20: 'Ventral_striatum_L',
 21: 'Ventral_striatum_R',
 22: 'vlPFC_L',
 23: 'vlPFC_R',
 24: 'vlThalamus_L',
 25: 'vlThalamus_R',
 26: 'vmPFC_L',
 27: 'vmPFC_R'}

### Function to make a thresholded graph

In [41]:
def threshold(G, corr_direction, min_correlation):

    ##Creates a copy of the graph
    H = G.copy()
    
    ##Checks all the edges and removes some based on corr_direction
    for stock1, stock2, weight in list(G.edges(data=True)):
        ##if we only want to see the positive correlations we then delete the edges with weight smaller than 0        
        if corr_direction == "positive":
            ####it adds a minimum value for correlation. 
            ####If correlation weaker than the min, then it deletes the edge
            if weight["weight"] <0 or weight["weight"] < min_correlation:
                H.remove_edge(stock1, stock2)
        ##this part runs if the corr_direction is negative and removes edges with weights equal or largen than 0
        else:
            ####it adds a minimum value for correlation. 
            ####If correlation weaker than the min, then it deletes the edge
            if weight["weight"] >=0 or weight["weight"] > min_correlation:
                H.remove_edge(stock1, stock2)
    return(H)

In [42]:
threshG = threshold(meanG, 'positive', .3)

### Function to visualize thresholded graph with modules in colors

In [60]:
def jenny_graph(graph):
    edges,weights = zip(*nx.get_edge_attributes(graph, 'weight').items())
    nodes, color = zip(*nx.get_node_attributes(graph,'modules').items()) #if your modules are named different change here
    nodes, positions = zip(*nx.get_node_attributes(graph,'ROIs').items())
    #positions
    positions=nx.circular_layout(graph) #this is defining a circluar graph, if you want a different one you change the circular part of this line

    #Figure size
    plt.figure(figsize=(40,25))
    
    
    #draws nodes
    color = np.array(color)
    nColormap=plt.cm.Spectral #check here if you want different colors https://matplotlib.org/3.1.1/gallery/color/colormap_reference.html
    cM=color.max()
    cm=color.min()
    y=nx.draw_networkx_nodes(graph,positions, 
                           node_color=color,
                           node_size=1000,
                           alpha=0.8, 
                           cmap= nColormap,
                           vmin=cm ,vmax=cM)

    #Styling for labels
    nx.draw_networkx_labels(graph, positions, labels=ROIs, font_size=15, 
                            font_family='sans-serif', fontweight = 'bold')
    
    #draw edges
    weights=np.array(weights)
    eColormap=plt.cm.bwr #check here if you want different colors https://matplotlib.org/3.1.1/gallery/color/colormap_reference.html
    wt=weights*5
    M=wt.max()
    m=wt.min()
    x=nx.draw_networkx_edges(graph, positions, edge_list=edges, style='solid', width = wt, edge_color = wt,
                           cmap=eColormap,
                           edge_vmin=m,
                           edge_vmax=M)
    
    #format the colorbar
    node_bar=plt.colorbar(y)
    edge_bar=plt.colorbar(x)

    node_bar.set_label('Modularity',fontsize = 25)
    edge_bar.set_label('Strength of edge weight',fontsize = 25)


    plt.axis('off')
    plt.title("Modularity and Edge Weights of Average Graph", fontsize = 30)
    plt.savefig("/Users/jennygilbert/Documents/betaseries_bevel/5_analysis/modularity_circle_punishment.png", format="PNG")
    #plt.show()

In [61]:
jenny_graph(threshG)

### Make Module Graph

Make a new graph with modules from whole group analysis 

In [47]:
modg = community.induced_graph(mod_dict, threshG)

In [48]:
#list(modg.nodes)

### Function to make module structure graph

In [64]:
def module_fig(G):
    edges,weights = zip(*nx.get_edge_attributes(G,'weight').items())
    #print(weights)
    
    positions=nx.circular_layout(G)
    plt.figure(figsize=(15,10))
    
    color = np.array(list(G.nodes))
    nColormap=plt.cm.Spectral #check here if you want different colors https://matplotlib.org/3.1.1/gallery/color/colormap_reference.html
    cM=color.max()
    cm=color.min()
    nx.draw_networkx_nodes(G,positions, 
                           node_color=color, 
                           node_size=1000,
                           alpha=0.8, 
                           cmap= 'Spectral',
                           vmin=cm,vmax=cM )
   
    #Styling for labels
    nx.draw_networkx_labels(G, positions, font_size=8, font_family='sans-serif')
    
    x=nx.draw_networkx_edges(G, positions, edge_list=edges,style='solid', width = weights, edge_color = weights)
   
    edge_bar=plt.colorbar(x)
    edge_bar.set_label('Strength of edge weight',fontsize = 25)
    
    plt.title("Module Connectivity Weights", fontsize = 30)
    plt.savefig("/Users/jennygilbert/Documents/betaseries_bevel/5_analysis/modularity_edges_punishment.png", format="PNG")
    plt.axis('off')
    #plt.show()

In [65]:
module_fig(modg)

### Calculate Nodal Metrics
- Clustering Coefficient as absolute triangles and 
- Clustering Coefficient as weighted coeff in thresholded graph
- Degree
- betweenness centrality

In [51]:
cluster_coeff = nx.clustering(threshG, weight = 'weight')
cluster_triangles = nx.triangles(threshG)
betweenness_centrality = nx.betweenness_centrality(threshG)
degree = nx.degree_centrality(threshG)

### Combine into one dataframe

In [52]:
node_df = pd.DataFrame([labels_dict, mod_dict,cluster_triangles,cluster_coeff,degree, betweenness_centrality]).T

In [53]:
node_df.rename(columns={0:"ROI",1: "module",2: "number of triangles", 3:"weighted clustering coefficient", 4:"degree centrality", 5:"betweenness centrality"}, inplace = True)
#node_df

In [54]:
#Save this to a csv file
node_df.to_csv('/Users/jennygilbert/Documents/betaseries_bevel/5_analysis/node_metrics_punishment.csv', header = True, index = None)

## Save GRAPHS dictionary in a pickle file in case of crash

In [55]:
pickle.dump(GRAPHS, open('/Users/jennygilbert/Documents/betaseries_bevel/tmp/Graphs', 'wb'), protocol=4)

In [None]:
with open('/Users/jennygilbert/Documents/betaseries_bevel/tmp/Graphs', 'rb') as pickle_file:
    try:
        while True:
            GRAPHS = pickle.load(pickle_file)
#             print (GRAPHS)
    except EOFError:
        pass