purpose: run network propagation for a given dataset, in this case used for locomotor activity and externalizing.

In [5]:
import os
import pandas as pd
import ndex2
import networkx as nx
from netcoloc import netprop_zscore
from netcoloc import netprop
from netcoloc import network_colocalization
import sys
import random

In [6]:
os.chdir('/tscc/projects/ps-palmer/brittany/rare_common_alcohol/rare_common_alcohol_comparison/notebooks/')

In [7]:
from rca_functions import *

In [8]:
os.chdir('/tscc/projects/ps-palmer/brittany/SUD_cross_species/')

In [9]:
random_seed=random.seed(211)

In [10]:
save_file=True

In [11]:
UUIDs={
    'PCNet2.0':'d73d6357-e87b-11ee-9621-005056ae23aa',
    'PCNet2.1':'e9c574f4-e87a-11ee-9621-005056ae23aa',
    'PCNet2.2':'8b4b54fa-e87d-11ee-9621-005056ae23aa'
}

# functions

In [8]:
def import_seedgenes(path,pcol='P',gene_col='GENE NAME',delim='comma', cutoff=None):
    if delim=='comma':
        df=pd.read_csv(path,sep=',')
    else:
        df=pd.read_csv(path,sep='\t')
    if pcol==None:
        print('pvalue column not specified- all genes will be used')
        cutoff=None
    if cutoff=='bonferroni':
        df=df[df[pcol]<0.05/len(df)]
    elif cutoff=='FDR_05':
        df=df[df[pcol]<0.05]
    else:
        print('cutoff not defined/custom- using all genes ')
        df=df
    print(df.head())
    return(df)

In [12]:
def import_interactome(interactome_name=None, UUIDs=UUIDs,ndex_user=None, ndex_password=None,UUID=None):
    """
    Imports a gene interactome from the NDEx database and returns it as a NetworkX graph object. Optionally,
    the function allows for importing using a unique identifier (UUID) or by an interactome name.

    The function checks if the interactome name provided corresponds to a predefined dictionary of UUIDs. If it does, it
    retrieves the network using the specified credentials. If an interactome name is not provided but a UUID is,
    it retrieves the network using the provided UUID. The nodes of pcnet_v14 are relabelled by their gene name rather than ID number.

    Parameters:
    - interactome_name (str, optional): The name of the interactome as defined in the UUIDs dictionary. If not provided
      but a UUID is, the interactome associated with the UUID is imported instead.
    - ndex_user (str, optional): The NDEx account username for accessing private networks.
    - ndex_password (str, optional): The NDEx account password for accessing private networks.
    - UUID (str, optional): A specific UUID to directly download an interactome from NDEx if the interactome name is not used.

    Returns:
    networkx.Graph: A graph object representing the interactome. Nodes and edges represent genes and their interactions, respectively.

    Notes:
    - The function uses the NDEx2 Python client and requires Internet access to NDEx's servers.
    - Depending on the access rights of the NDEx account, private or public interactomes can be retrieved.
    - The function prints the number of nodes and edges of the imported graph for diagnostic purposes.

    Raises:
    - ValueError: If neither an interactome name nor a UUID is provided.
    """    
    interactome_uuid=UUIDs[interactome_name]
    print(interactome_name)
    ndex_server='public.ndexbio.org'
    #import network based on provided interactome key
    if (interactome_name in UUIDs.keys()):
        graph = ndex2.create_nice_cx_from_server(
                    ndex_server, 
                    username=ndex_user, 
                    password=ndex_password, 
                    uuid=interactome_uuid
                ).to_networkx()
        if (interactome_name=='pcnet_v14'):
            graph=nx.relabel_nodes(graph, nx.get_node_attributes(graph, 'HGNC Symbol'))
        # print out interactome num nodes and edges for diagnostic purposes
        print('number of nodes:')
        print(len(graph.nodes))
        print('\nnumber of edges:')
        print(len(graph.edges))
        return(graph)
    elif(interactome_name==None & UUID!=None):
        print('using novel UUID. For UUIDs used in this study, see UUID_dict')
        graph = ndex2.create_nice_cx_from_server(
            ndex_server, 
            username=ndex_user, 
            password=ndex_password, 
            uuid=UUID
        ).to_networkx()
        # print out interactome num nodes and edges for diagnostic purposes
        print('number of nodes:')
        print(len(graph.nodes))
        print('\nnumber of edges:')
        print(len(graph.edges))
        return(graph)
    else:
        print('UUID/interactome name not provided- please provide either to import interactome.')

In [13]:
#from rat bmi notebooks not netcoloc
def calculate_heat_zscores_with_sampling(data, nodes, individual_heats, G_PC, trait="BMI", max_genes=500, num_samples=100,
                                        nominal_sig=0.05, num_reps=1000, out_path="", minimum_bin_size=10,outfile='sample.tsv'):
    """Takes a set of summary statistics and a molecular interaction and performs sampling of the significant genes.
    For each sample a random selection of seed genes is chosen, weighted by the p-value of each gene in the summary
    statistics. Network propagation with zscore calculation is performed for each sample to generate a distribution
    of z-scores for each gene in the seed_gene set.

    Args:
        data (pd.DataFrame): Gene level summary statistics
        nodes (list): list of nodes in the interaction network
        individual_heats (np.array): Heat matrix calculated by `netprop_zscore.get_individual_heats_matrix()`
        G_PC (nx.Graph): molecular interaction network
        trait (str, optional): name of trait being investigated. Defaults to "BMI".
        max_genes (int, optional): Maximum number of seed genes to include in each sample (maximum=500). Defaults to 500.
        num_samples (int, optional): Number of times to perform sampling. Defaults to 100.
        nominal_sig (float, optional): Significance cutoff for keeping genes in data (Note: this value will be Bonferroni corrected). Defaults to 0.05.
        num_reps (int, optional): Number of repetitions of randomization for generating null distribution for z_scores. Defaults to 1000.
        out_path (str, optional): File path prefix for saving results of sampling. Defaults to "".
        minimum_bin_size (int, optional): minimum number of genes that should be in each degree matching bin. Defaults to 10.

    Returns:
        pd.DataFrame: Gene x sampling run dataframe of sampled z-scores
    """
    assert max_genes <= 500, "NetColoc is only valid for sets of 500 or less genes so maximum number of genes for sampling must be <= 500"
    #outfile = out_path + trait + "sampling_" + str(max_genes) + "_" + str(num_samples) + ".tsv"
    data = data.loc[data.gene_symbol.isin(nodes)]  # subset to genes in interaction network
    all_seeds = data.loc[data.pvalue <= nominal_sig / len(data)]  # Bonferroni correction
    all_seeds = all_seeds.assign(log10p=-1 * np.log10(all_seeds.pvalue))  # get -log10p for weighted sampling
    sampling_results = []
    for i in tqdm(range(num_samples)):
        # perform propagation for sample
        sample_seeds = random.choices(population=all_seeds.gene_symbol.values, weights=all_seeds.log10p.values, k=max_genes)
        sample_results = netprop_zscore.calculate_heat_zscores(individual_heats, nodes=list(G_PC.nodes), degrees=dict(G_PC.degree),
                                                seed_genes=sample_seeds, num_reps=num_reps,
                                                minimum_bin_size=minimum_bin_size, random_seed=i)[0]
        sample_z = pd.DataFrame(sample_results, columns=["z" + str(i)])
        # save running results of sampling
        if i == 0:
            sample_z.to_csv(outfile, sep="\t")
        else:
            existing = pd.read_csv(outfile, sep="\t", index_col=0)
            existing = existing.join(sample_z)
            existing.to_csv(outfile, sep="\t")
        sampling_results.append(sample_z)
    return pd.concat(sampling_results, axis=1)

# Interactome Set-up

pcnet2- versions 
from wright et al. 2024 preprint:
PCNet 2.0= best-performing ranked composite (top 15 interactomes, 3.85M interactions)
PCNet 2.1= top 8 interactomes, 1.75M interactions
PCNet 2.2= top 10 co-citation-free interactomes, 3.32M interactions 

In [14]:
interactome_name='PCNet2.2'

In [15]:
graph=import_interactome(interactome_name)

PCNet2.2
number of nodes:
18558

number of edges:
3323928


In [16]:
all_nodes=list(graph.nodes())

In [17]:
# pre calculate the matricies used for network propagation
print('\ncalculating w_prime')
w_prime = netprop.get_normalized_adjacency_matrix(graph, conserve_heat=True)

print('\ncalculating w_double_prime')
w_double_prime = netprop.get_individual_heats_matrix(w_prime, .5)


calculating w_prime

calculating w_double_prime


# calculate gwas NPS

In [18]:
os.getcwd()

'/tscc/projects/ps-palmer/brittany/SUD_cross_species'

In [19]:
#use for GWAS- actual traits of interest
mag_dir='magma/seed_genes/'
file_dict={
    'loco':mag_dir+'loco_win10_annot.tsv',
    'loco_gsem':mag_dir+'loco_gsem_annot.tsv',
    'ext_munged':mag_dir+'ext_munged_annot.tsv',
    'ext':mag_dir+'ext_orig_annot.tsv',
    'ext_st22':mag_dir+'all_tests_ext1_st22_genes.csv',
    'loco_mega_fus_naac':'loco_twas_dan/loco_fusion_NACC_seed.tsv',
    'ext_fus_naac':'ext_FUSION/ext_fusion_NACC_seed.tsv'
}

In [20]:
#dictionary of human control traits
ctrl_dict={}
ctrl_traits=['facial_hair', 'age_smkinit', 'antisoc', 'friend_sat', 'hr', 'infant_bw', 'LDL', 'maternal_smok', 'townsend', 'age_menarche', 'neurot']
for t in ctrl_traits:
    ctrl_dict[t]=pd.read_csv('gwas_ctrl_hm/magma/seed_genes/'+t+'_annot.tsv',sep='\t')

In [21]:
seed_dict={}

In [22]:
#locomotor mega analysis
loco=pd.read_csv(file_dict['loco'],sep='\t')
loco_bonf_cut=2.6389402016150313e-06 #calculated in define_seed_genes_orthologs- from raw magma results before adding in ortholog info
seed_dict['loco_FDR']=(set(loco[loco['Q']<0.05]['HM_ORTHO']))
seed_dict['loco_bonf']=(set(loco[loco['P']<loco_bonf_cut]['HM_ORTHO']))

In [23]:
#locomotor- gsem common factor
loco_gsem=pd.read_csv(file_dict['loco_gsem'],sep='\t')
loco_gsem_bonf_cut=2.650129856362962e-06 #calculated in define_seed_genes_orthologs- from raw magma results before adding in ortholog info
seed_dict['loco_gsem_FDR']=(set(loco_gsem[loco_gsem['Q']<0.05]['HM_ORTHO']))
seed_dict['loco_gsem_bonf']=(set(loco_gsem[loco_gsem['P']<loco_gsem_bonf_cut]['HM_ORTHO']))

In [24]:
#locomotor- mega FUSION
loco_fus=pd.read_csv(file_dict['loco_mega_fus_naac'],sep='\t')
loco_fus_bonf_cut=9.338812103100487e-06 #calculated in define_seed_genes_orthologs- from raw magma results before adding in ortholog info
seed_dict['loco_mega_fus_naac_FDR']=(set(loco_fus[loco_fus['Q']<0.05]['human_ortholog']))
seed_dict['loco_mega_fus_naac_bonf']=(set(loco_fus[loco_fus['TWAS.P']<loco_fus_bonf_cut]['human_ortholog']))

In [25]:
#externalizing- FUSION
ext=pd.read_csv(file_dict['ext_fus_naac'],sep='\t')
ext_bonf_cut=0.05/len(ext) # no ortholog adding, so can calculate from this table
seed_dict['ext_fus_naac_FDR']=(set(ext[ext['Q']<0.05]['ID']))
seed_dict['ext_fus_naac_bonf']=(set(ext[ext['TWAS.P']<ext_bonf_cut]['ID']))
seed_dict['ext_fus_naac_top500']=set(ext[(ext['ID'].isin(all_nodes))].nsmallest(500,'TWAS.P')['ID'])

In [26]:
ext=pd.read_csv(file_dict['ext'],sep='\t')
ext_bonf_cut=0.05/len(ext) # no ortholog adding, so can calculate from this table
seed_dict['ext_FDR']=(set(ext[ext['Q']<0.05]['GENE']))
seed_dict['ext_bonf']=(set(ext[ext['P']<ext_bonf_cut]['GENE']))
seed_dict['ext_top500']=set(ext[(ext['GENE'].isin(all_nodes))].nsmallest(500,'P')['GENE'])
#the genes from ext1.0 supplemental table 22
ext_st22=pd.read_csv(file_dict['ext_st22'],sep='\t')
seed_dict['ext_st22']=(set(ext_st22['GENE NAME']))

In [27]:
for t in ctrl_traits:
    seed_dict[t+'_FDR']=(set(ctrl_dict[t][ctrl_dict[t]['Q']<0.05]['GENE']))
    seed_dict[t+'_bonf']=(set(ctrl_dict[t][ctrl_dict[t]['P']<0.05/len(ctrl_dict[t])]['GENE']))
    seed_dict[t+'_top500']=set(ctrl_dict[t][(ctrl_dict[t]['GENE'].isin(all_nodes))].nsmallest(500,'P')['GENE'])

In [28]:
max(ext[ext.P<1.556900e-08].P)

1.5538e-08

In [30]:
seed_dict.keys()

dict_keys(['loco_FDR', 'loco_bonf', 'loco_gsem_FDR', 'loco_gsem_bonf', 'loco_mega_fus_naac_FDR', 'loco_mega_fus_naac_bonf', 'ext_fus_naac_FDR', 'ext_fus_naac_bonf', 'ext_fus_naac_top500', 'ext_FDR', 'ext_bonf', 'ext_top500', 'ext_st22', 'facial_hair_FDR', 'facial_hair_bonf', 'facial_hair_top500', 'age_smkinit_FDR', 'age_smkinit_bonf', 'age_smkinit_top500', 'antisoc_FDR', 'antisoc_bonf', 'antisoc_top500', 'friend_sat_FDR', 'friend_sat_bonf', 'friend_sat_top500', 'hr_FDR', 'hr_bonf', 'hr_top500', 'infant_bw_FDR', 'infant_bw_bonf', 'infant_bw_top500', 'LDL_FDR', 'LDL_bonf', 'LDL_top500', 'maternal_smok_FDR', 'maternal_smok_bonf', 'maternal_smok_top500', 'townsend_FDR', 'townsend_bonf', 'townsend_top500', 'age_menarche_FDR', 'age_menarche_bonf', 'age_menarche_top500', 'neurot_FDR', 'neurot_bonf', 'neurot_top500'])

In [34]:
overwrite=False

# run straight propagation- no sampling

In [35]:
#loop for all seed genes
for k in seed_dict.keys():  
    seed_genes = list(seed_dict[k].intersection(graph.nodes()))
    
    NPSc, Fnew_score, Fnew_rand_score = netprop_zscore.calculate_heat_zscores(
        w_double_prime,  
        list(graph.nodes()),
        dict(graph.degree), 
        seed_genes, num_reps=1000,
        minimum_bin_size=100,
        random_seed=random_seed)
    print(NPSc.head())
    if save_file:
        file_path='network_scores/'+k+'_'+interactome_name+'_zscore.tsv'
        print(file_path)
        if ((overwrite==False)&(os.path.exists(file_path))):
            print('File already exists. If you would like to overwrite this file, set overwrite=True, and rerun')
        else:
            NPSc.to_csv(file_path,sep='\t',header=False)

  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.134343
APP     -0.651827
LRP1    -0.862658
APOA1    0.028221
HP      -0.318967
dtype: float64
network_scores/loco_FDR_PCNet2.2_zscore.tsv
File already exists. If you would like to overwrite this file, set overwrite=True, and rerun


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      1.065815
APP     -0.724583
LRP1    -0.158108
APOA1    1.475580
HP       2.686806
dtype: float64
network_scores/loco_bonf_PCNet2.2_zscore.tsv
File already exists. If you would like to overwrite this file, set overwrite=True, and rerun


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      0.924831
APP     -0.305334
LRP1    -0.132837
APOA1    0.009959
HP       0.510004
dtype: float64
network_scores/loco_gsem_FDR_PCNet2.2_zscore.tsv
File already exists. If you would like to overwrite this file, set overwrite=True, and rerun


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      0.069125
APP      0.431477
LRP1    -0.795020
APOA1    1.367644
HP       1.317385
dtype: float64
network_scores/loco_gsem_bonf_PCNet2.2_zscore.tsv
File already exists. If you would like to overwrite this file, set overwrite=True, and rerun


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -1.758014
APP      0.267926
LRP1    -0.317058
APOA1   -0.895896
HP       0.014102
dtype: float64
network_scores/loco_mega_fus_naac_FDR_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      0.416191
APP     -0.358251
LRP1     1.729933
APOA1   -0.340899
HP      -0.260175
dtype: float64
network_scores/loco_mega_fus_naac_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -1.642801
APP     -1.530574
LRP1    -0.228867
APOA1   -0.033457
HP      -1.417251
dtype: float64
network_scores/ext_fus_naac_FDR_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.950643
APP     -1.425211
LRP1    -1.094331
APOA1   -0.258070
HP      -1.227223
dtype: float64
network_scores/ext_fus_naac_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -1.296115
APP     -0.343061
LRP1    -1.766188
APOA1   -1.168037
HP      -0.805858
dtype: float64
network_scores/ext_fus_naac_top500_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      10.051053
APP      -0.500312
LRP1      0.064689
APOA1     3.474628
HP       14.244613
dtype: float64
network_scores/ext_FDR_PCNet2.2_zscore.tsv
File already exists. If you would like to overwrite this file, set overwrite=True, and rerun


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M       0.491128
APP      -0.125826
LRP1      0.269395
APOA1     0.260098
HP       11.680703
dtype: float64
network_scores/ext_bonf_PCNet2.2_zscore.tsv
File already exists. If you would like to overwrite this file, set overwrite=True, and rerun


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      -0.187555
APP       0.859860
LRP1      0.361742
APOA1     0.830798
HP       10.402558
dtype: float64
network_scores/ext_top500_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.734289
APP      1.730562
LRP1     0.000709
APOA1   -1.045921
HP      -0.091437
dtype: float64
network_scores/ext_st22_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.389475
APP     -0.895909
LRP1    -0.648955
APOA1    0.431824
HP       0.876112
dtype: float64
network_scores/facial_hair_FDR_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      0.780800
APP     -0.652169
LRP1    -0.940712
APOA1    1.351508
HP       0.979285
dtype: float64
network_scores/facial_hair_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      0.236452
APP     -0.857820
LRP1    -0.569149
APOA1    1.100226
HP       0.775385
dtype: float64
network_scores/facial_hair_top500_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.497332
APP     -0.470064
LRP1     0.557742
APOA1   -1.384027
HP      -1.131338
dtype: float64
network_scores/age_smkinit_FDR_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.591193
APP      0.317278
LRP1     1.610996
APOA1   -0.721908
HP      -0.717898
dtype: float64
network_scores/age_smkinit_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      1.808617
APP      0.948145
LRP1     0.757831
APOA1    0.453737
HP      -0.481454
dtype: float64
network_scores/age_smkinit_top500_PCNet2.2_zscore.tsv


  F /= len(seed_genes)


  0%|          | 0/1000 [00:00<?, ?it/s]

  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(

A2M     NaN
APP     NaN
LRP1    NaN
APOA1   NaN
HP      NaN
dtype: float64
network_scores/antisoc_FDR_PCNet2.2_zscore.tsv


  F /= len(seed_genes)


  0%|          | 0/1000 [00:00<?, ?it/s]

  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(

A2M     NaN
APP     NaN
LRP1    NaN
APOA1   NaN
HP      NaN
dtype: float64
network_scores/antisoc_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      1.684422
APP      2.658398
LRP1    -0.467265
APOA1    0.574012
HP       1.177526
dtype: float64
network_scores/antisoc_top500_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.207737
APP     -0.052107
LRP1    -0.746534
APOA1   -0.418000
HP      -0.210539
dtype: float64
network_scores/friend_sat_FDR_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      0.513500
APP      0.880783
LRP1     0.211676
APOA1   -0.056750
HP       0.322764
dtype: float64
network_scores/friend_sat_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -2.717756
APP      1.686913
LRP1    -0.114849
APOA1   -0.407208
HP      -1.646550
dtype: float64
network_scores/friend_sat_top500_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      -1.050320
APP       0.525275
LRP1      0.338783
APOA1    -0.347268
HP       12.180185
dtype: float64
network_scores/hr_FDR_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.742919
APP      2.206056
LRP1     1.051196
APOA1   -0.247525
HP      -1.990716
dtype: float64
network_scores/hr_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.900523
APP      2.236085
LRP1     1.117819
APOA1   -0.442916
HP      -1.891946
dtype: float64
network_scores/hr_top500_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M       1.314897
APP      -0.415746
LRP1     -0.779621
APOA1     0.259532
HP       12.063880
dtype: float64
network_scores/infant_bw_FDR_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.818466
APP     -0.690369
LRP1     0.206481
APOA1    0.552147
HP       0.225739
dtype: float64
network_scores/infant_bw_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      0.091980
APP     -1.196437
LRP1     0.086283
APOA1   -0.113798
HP       0.616711
dtype: float64
network_scores/infant_bw_top500_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      2.935341
APP     -1.037783
LRP1     1.401090
APOA1    2.639134
HP       9.052389
dtype: float64
network_scores/LDL_FDR_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.074677
APP     -0.779616
LRP1    -0.512343
APOA1    2.555159
HP       7.840743
dtype: float64
network_scores/LDL_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M       2.076254
APP      -1.376105
LRP1      0.953513
APOA1     6.527171
HP       10.169190
dtype: float64
network_scores/LDL_top500_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      0.588458
APP     -0.728430
LRP1     0.510486
APOA1    0.358485
HP      -1.011622
dtype: float64
network_scores/maternal_smok_FDR_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      3.019980
APP     -1.112749
LRP1    -0.637090
APOA1    1.857977
HP      -0.737146
dtype: float64
network_scores/maternal_smok_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.848566
APP     -1.282425
LRP1     1.831907
APOA1   -0.424422
HP      -0.506722
dtype: float64
network_scores/maternal_smok_top500_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -1.259249
APP     -0.275438
LRP1    -0.666443
APOA1    0.563426
HP      -0.423157
dtype: float64
network_scores/townsend_FDR_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      0.346535
APP     -0.221312
LRP1     0.680180
APOA1    3.278910
HP       1.375181
dtype: float64
network_scores/townsend_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.427975
APP     -1.080647
LRP1    -0.470492
APOA1    0.922520
HP       0.798802
dtype: float64
network_scores/townsend_top500_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.826131
APP      0.999746
LRP1     1.288902
APOA1   -0.048446
HP       1.131369
dtype: float64
network_scores/age_menarche_FDR_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.399001
APP     -0.232180
LRP1     0.049802
APOA1    0.500152
HP       0.867432
dtype: float64
network_scores/age_menarche_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M     -0.633093
APP     -0.110638
LRP1    -0.196687
APOA1    0.357202
HP       0.671908
dtype: float64
network_scores/age_menarche_top500_PCNet2.2_zscore.tsv


  F /= len(seed_genes)


  0%|          | 0/1000 [00:00<?, ?it/s]

  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(

A2M     NaN
APP     NaN
LRP1    NaN
APOA1   NaN
HP      NaN
dtype: float64
network_scores/neurot_FDR_PCNet2.2_zscore.tsv


  F /= len(seed_genes)


  0%|          | 0/1000 [00:00<?, ?it/s]

  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(seed_genes)
  F /= len(

A2M     NaN
APP     NaN
LRP1    NaN
APOA1   NaN
HP      NaN
dtype: float64
network_scores/neurot_bonf_PCNet2.2_zscore.tsv


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      10.177219
APP      -1.146705
LRP1      0.313084
APOA1    -0.172331
HP       -0.198504
dtype: float64
network_scores/neurot_top500_PCNet2.2_zscore.tsv


# run for an individual seed gene set

In [34]:
seed_dict.keys()

dict_keys(['loco_gsem_FDR', 'loco_gsem_bonf'])

In [34]:
overwrite=False

In [43]:
k='loco_gsem_FDR'
print(len(seed_dict[k]))
seed_genes = list(seed_dict[k].intersection(graph.nodes()))

NPSc, Fnew_score, Fnew_rand_score = netprop_zscore.calculate_heat_zscores(
    w_double_prime,  
    list(graph.nodes()),
    dict(graph.degree), 
    seed_genes, num_reps=1000,
    minimum_bin_size=100,
    random_seed=random_seed)
print(NPSc.head())
if save_file:
    file_path='network_scores/'+k+'_'+interactome_name+'_zscore.tsv'
    print(file_path)
    if ((overwrite==False)&(os.path.exists(file_path))):
        print('File already exists. If you would like to overwrite this file, set overwrite=True, and rerun')
    else:
        NPSc.to_csv(file_path,sep='\t',header=False)

458


  0%|          | 0/1000 [00:00<?, ?it/s]

A2M      0.863378
APP     -0.220132
LRP1    -0.149449
APOA1   -0.046087
HP       0.485143
dtype: float64
network_scores/loco_gsem_FDR_PCNet2.2_zscore.tsv
