### Fig 6 E, F: Network metrics' comparisons graphs

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import zscore
import seaborn as sns
import matplotlib.pyplot as plt
from pylab import *
from scipy import stats
import os

# For graph title
import re as re_title

# For converting y axis ticks to integers only
# https://stackoverflow.com/questions/12050393/how-to-force-the-y-axis-to-only-use-integers-in-matplotlib
from matplotlib.ticker import MaxNLocator

In [None]:
os.chdir("..")

In [None]:
save = True

In [None]:
def graph_title(networkmetric):
   """
    Parameters
    __________
    networkmetric : str
        Network metric of interest
        
    Returns
    _______
    graph_title : str
        Network metric with words separated for visualization
    """
    
    word_list = re_title.findall('[A-Z][^A-Z]*', networkmetric)
    graph_title = ''
    for word in word_list:
        if word != word_list[-1]:
            graph_title = graph_title + word + ' '
        else:
            graph_title = graph_title + word
    
    return graph_title   

Read in node_tables files for each network at UCSF and in UCDDP

In [None]:
node_tables = dict()

race_ethnicities = ['Asian', 'Black', 'Latine', 'White']
AD_status = ['AD', 'con']

for re in race_ethnicities:
    for status in AD_status:
        node_tables[re+'_'+status+'_UCSF'] = \
        pd.read_csv('Network_Analysis/ADCon_phe/node_tables/'+re+'_'+status+'_5_node_table.csv')
        node_tables[re+'_'+status+'_UCDDP'] = \
        pd.read_csv('Network_Analysis/UCDDP/ADCon_phe/node_tables/'+re+'_'+status+'_UCDDP_5_node_table.csv')

Specify network metrics measured

In [None]:
network_metrics = ['AverageShortestPathLength',
                   'ClusteringCoefficient',
                   'ClosenessCentrality',
                   'Eccentricity',
                   'Stress',
                   'Degree',
                   'BetweennessCentrality',
                   'NeighborhoodConnectivity',
                   'Radiality',
                   'TopologicalCoefficient']

Order of networks. Used downstream to check that the below for loop is working as expected

In [None]:
# Order check (to make sure metrics are being attributed to correct networks)
order_UCSF = ['Asian_AD_UCSF', 
              'Asian_con_UCSF', 
              'Black_AD_UCSF', 
              'Black_con_UCSF', 
              'Latine_AD_UCSF', 
              'Latine_con_UCSF', 
              'White_AD_UCSF', 
              'White_con_UCSF']

order_UCDDP = ['Asian_AD_UCDDP', 
               'Asian_con_UCDDP', 
               'Black_AD_UCDDP', 
               'Black_con_UCDDP', 
               'Latine_AD_UCDDP', 
               'Latine_con_UCDDP', 
               'White_AD_UCDDP', 
               'White_con_UCDDP']

Make dictionary of metrics, with each metric key corresponding to a list containing the average metric value for each network

In [None]:
UCSF_metrics = dict()
UCDDP_metrics = dict()
# Should be the same as network metrics
metric_list = list()

for metric in network_metrics:
    
    # For keeping track of order of networks
    i = 0
    j = 0
    
    print(metric)
    metric_list.append(metric)
    
    # List of average metric values for each network (8 total in each)
    temp_UCSF = list()
    temp_UCDDP = list()
    
    for key in node_tables.keys():
        if 'UCSF' in key:
            # Double check correct network:
            if key == order_UCSF[i]:
                temp_UCSF.append(node_tables[key][metric].mean())
                i += 1
            else:
                print("UCSF key doesn't match order.")
        elif 'UCDDP' in key:
            # Double check correct network:
            if key == order_UCDDP[j]:
                temp_UCDDP.append(node_tables[key][metric].mean())
                j += 1
            else:
                print(key)
                print("UCDDP key doesn't match order.")
        else:
            print('Institution not specified, please check.')
    
    UCSF_metrics[metric] = temp_UCSF
    UCDDP_metrics[metric] = temp_UCDDP

In [None]:
metric_list == network_metrics

Make dataframe of metrics, one for UCSf and the other for UCDDP

In [None]:
UCSF_metrics_df = pd.DataFrame(UCSF_metrics, index=order_UCSF)
UCDDP_metrics_df = pd.DataFrame(UCDDP_metrics, index=order_UCDDP)

In [None]:
UCSF_metrics_df['Radiality']

In [None]:
UCDDP_metrics_df['Radiality']

Calculate z score to normalize

In [None]:
# default ddof is zero
UCSF_metrics_df_z = UCSF_metrics_df.apply(zscore)
UCDDP_metrics_df_z = UCDDP_metrics_df.apply(zscore)

In [None]:
UCSF_metrics_df_z

Scatterplot

In [None]:
plt.scatter(UCSF_metrics_df_z, UCDDP_metrics_df_z)
plt.show()

Make scatterplot using seaborn

In [None]:
# Make index equal between UCSF and UCDDP so that the two dataframes can be merged
new_index = list()

for network in UCSF_metrics_df_z.index:
    new_index.append(network[:-5])

print(new_index)

In [None]:
UCSF_metrics_df_z.index = new_index
UCDDP_metrics_df_z.index = new_index

In [None]:
combined = UCSF_metrics_df_z.merge(UCDDP_metrics_df_z, 
                                   left_on=UCSF_metrics_df_z.index, 
                                   right_on=UCDDP_metrics_df_z.index,
                                   suffixes=['_UCSF', '_UCDDP'])
combined = combined.set_index('key_0')
combined.index = combined.index.rename('Network')
combined

Obtain colors that will be used to identify networks in scatterplot

In [None]:
# Get hex codes for Set2 (AD networks), Pastel2 (control networks) and Dark2 (metric)
# https://stackoverflow.com/questions/33596491/extract-matplotlib-colormap-in-hex-format

cmaps = ['Set2', 'Pastel2', 'Dark2']

cmap_colors = dict()

for colormap in cmaps:
    print('Colors for: {}'.format(colormap))
    # list of colors for specific colormap:
    color_list = list()
    cmap = cm.get_cmap(colormap)

    for i in range(cmap.N):
        rgba = cmap(i)
        # rgb2hex accepts rgb or rgba
        print(matplotlib.colors.rgb2hex(rgba))
        color_list.append(matplotlib.colors.rgb2hex(rgba))
    
    cmap_colors[colormap] = color_list
    
    print('\n')

In [None]:
# Make list of paired colors for Set2 and Pastel2, which will be used to color points by network
network_colors = list()

for i in range(4):
    network_colors.append(cmap_colors['Set2'][i])
    network_colors.append(cmap_colors['Pastel2'][i])

print(network_colors)

In [None]:
combined.index

In [None]:
combined_index_fig = ['Asian-identified patients with AD', 
                      'Asian-identified control patients', 
                      'Black-identified patients with AD', 
                      'Black-identified control patients', 
                      'Latine-identified patients with AD',
                      'Latine-identified control patients', 
                      'White-identified patients with AD', 
                      'White-identified control patients']

In [None]:
# Label was useful parameter for making legend
# https://matplotlib.org/3.5.0/api/_as_gen/matplotlib.pyplot.legend.html
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(15,3.75), sharey=True)

#https://stackoverflow.com/questions/21321670/how-to-change-fonts-in-matplotlib-python
kwargs = {'fontname' : 'Arial'}

plt.rcParams.update({'font.family':'sans-serif'})
plt.rcParams.update({'font.sans-serif':'Arial'})

i = 0 # So that legend for network-labeled graph gets generated once
for metric in metric_list:
    # network-labeled graph
    if i == 0:
        ax1 = sns.scatterplot(data=combined, 
                              x=metric+'_UCSF', 
                              y=metric+'_UCDDP', 
                              hue=combined_index_fig, 
                              palette=network_colors, 
                              ax=ax1)
        i += 1
    else:
         ax1 = sns.scatterplot(data=combined, 
                               x=metric+'_UCSF', 
                               y=metric+'_UCDDP', 
                               hue=combined_index_fig, 
                               palette=network_colors,
                               legend=False,
                               ax=ax1)
    
    # metric-labeled graph
    ax2 = sns.scatterplot(data=combined, 
                          x=metric+'_UCSF', 
                          y=metric+'_UCDDP', 
                          label=graph_title(metric), 
                          ax=ax2)

#https://stackoverflow.com/questions/21321670/how-to-change-fonts-in-matplotlib-python
kwargs = {'fontname' : 'Arial'}

ax1.set_xlabel('UCSF Normalized Units', size=14, **kwargs)
ax2.set_xlabel('UCSF Normalized Units', size=14, **kwargs)
ax1.set_ylabel('UC-wide Normalized Units', size=14, **kwargs)


for tick in ax1.xaxis.get_major_ticks():
    tick.label.set_fontsize(12)
for tick in ax2.xaxis.get_major_ticks():
    tick.label.set_fontsize(12)
for tick in ax1.yaxis.get_major_ticks():
    tick.label.set_fontsize(12)
ax1.yaxis.set_major_locator(MaxNLocator(integer=True))


ax1.legend(bbox_to_anchor=(-0.17,1))
ax2.legend(bbox_to_anchor=(1.05,1.03))

ax2.yaxis.set_visible(False)
plt.tight_layout()

if save:
    plt.savefig(os.getcwd()+'/Figures/Fig6_EF.pdf', bbox_inches='tight')   

plt.show()

Obtain correlation

In [None]:
UCSF_metrics_array_z = UCSF_metrics_df_z.to_numpy().flatten()
UCDDP_metrics_array_z = UCDDP_metrics_df_z.to_numpy().flatten()

In [None]:
stats.spearmanr(UCSF_metrics_array_z, UCDDP_metrics_array_z)

In [None]:
UCSF_metrics_df_z