# purpose and use notes

purpose: generates NPS scores for traits used in this paper. If overwrite_file==TRUE, the network propagation scores will be overwritten with the newly generated scores. This will affect all downstream analyses.

runs network propagation (typically in pcnet v1.4) from seed genes saved in a file- accessed from meta data csv. if rerun==TRUE, then network propagation scores will be recalculated.

# setup

In [7]:
#read in libraries
from rca_functions import *
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import matplotlib
from matplotlib_venn import venn2 
from scipy.stats import hypergeom
import statsmodels.stats.multitest
import rca_functions
import ndex2
import networkx as nx
from netcoloc import netprop_zscore
from netcoloc import netprop
from netcoloc import network_colocalization
import sys

In [2]:
os.chdir('../')

In [14]:
os.getcwd()

'/Users/brittanyleger/Documents/GitHub/rare_common_alcohol_comparison'

In [4]:
save_file=False

# Interactome Set-up

In [5]:
interactome_name='pcnet_v14'

In [9]:
graph=import_interactome(interactome_name)

pcnet_v14
number of nodes:
18630

number of edges:
2687393


In [10]:
# pre calculate the matricies used for network propagation
print('\ncalculating w_prime')
w_prime = netprop.get_normalized_adjacency_matrix(graph, conserve_heat=True)

print('\ncalculating w_double_prime')
w_double_prime = netprop.get_individual_heats_matrix(w_prime, .5)


calculating w_prime

calculating w_double_prime


In [23]:
common_datasets=pd.read_csv('common_datasets_prepub.csv',sep=',')
rare_datasets=pd.read_csv('rare_datasets_prepub.csv',sep=',')

# rare gene data analysis

In [24]:
rare_datasets

Unnamed: 0,label,cutoff used,seed_path,delim,zscore_file,zscore_path,seed_gene_name,phenotype_group
0,alcoholintake_FDR_25,all tests FDR <0.25,input_files/rare_variant_genebass/alcohol_inta...,comma,alcoholintake_fdr_25_zscore.tsv,calculated_values/network_scores/alcoholintake...,Gene Name,alcohol
1,rare_neale_20153_irnt_FDR_25,all tests FDR <0.25,input_files/rare_variant_genebass/20153_irnt/2...,tab,rare_neale_20153_irnt_fdr_25_zscore.tsv,calculated_values/network_scores/rare_neale_20...,Gene Name,control
2,rare_neale_20016_FDR_25,all tests FDR <0.25,input_files/rare_variant_genebass/20016/20016_...,tab,rare_neale_20016_fdr_25_zscore.tsv,calculated_values/network_scores/rare_neale_20...,Gene Name,control
3,rare_neale_4194_FDR_25,all tests FDR <0.25,input_files/rare_variant_genebass/4194/4194_25...,tab,rare_neale_4194_fdr_25_zscore.tsv,calculated_values/network_scores/rare_neale_41...,Gene Name,control
4,rare_neale_78_FDR_25,all tests FDR <0.25,input_files/rare_variant_genebass/78/78_25FDR.tsv,tab,rare_neale_78_fdr_25_zscore.tsv,calculated_values/network_scores/rare_neale_78...,Gene Name,control
5,rare_neale_C50_FDR_25,all tests FDR <0.25,input_files/rare_variant_genebass/C50/C50_25FD...,tab,rare_neale_c50_fdr_25_zscore.tsv,calculated_values/network_scores/rare_neale_c5...,Gene Name,control
6,rare_neale_C44_FDR_25,all tests FDR <0.25,input_files/rare_variant_genebass/C44/C44_25FD...,tab,rare_neale_c44_fdr_25_zscore.tsv,calculated_values/network_scores/rare_neale_c4...,Gene Name,control
7,rare_neale_100016_FDR_25,all tests FDR <0.25,input_files/rare_variant_genebass/100016/10001...,tab,rare_neale_100016_fdr_25_zscore.tsv,calculated_values/network_scores/rare_neale_10...,Gene Name,control
8,rare_strin_allcut_alcoholintake,burden bonferroni < 0.05 in the whole table of...,input_files/rare_variant_genebass/alcohol_inta...,comma,rare_strin_allcut_alcoholintake_zscore.tsv,calculated_values/network_scores/rare_strin_al...,gene_symbol,alcohol


In [None]:
#import seed genes- rare
for row in range(len(rare_datasets)):
    seed_genes = set(import_seedgenes(rare_datasets['seed_path'][row], 
                                      None, 
                                      rare_datasets['seed_gene_name'][row], 
                                      rare_datasets['delim'][row])[rare_datasets['seed_gene_name'][row]])
    #filter for only genes in the interactome
    seed_genes = list(seed_genes.intersection(graph.nodes()))

    NPSc, Fnew_score, Fnew_rand_score = netprop_zscore.calculate_heat_zscores(
        w_double_prime,  
        list(graph.nodes()),
        dict(graph.degree), 
        seed_genes, num_reps=1000,
        minimum_bin_size=100)
    print(NPSc.head())
    print('calculated_values/network_scores/'+rare_datasets['label'][row]+'_zscore.tsv')
    if save_file:
        NPSc.to_csv(('calculated_values/network_scores/'+rare_datasets['label'][row]+'_zscore.tsv').lower(),sep='\t',header=False)

# common gene data analysis

In [46]:
common_datasets

Unnamed: 0,group,snp2gene_method,label,phenotype_group,seed_path,delim,zscore_file,zscore_path,seed_p,seed_gene_name,description,cutoff
0,GSCAN_DPW,magma,GSCAN_DPW_magma,alcohol,input_files/GSCAN_DPW/GSCAN_DPW_magma_results.csv,comma,gscan_dpw_magma_zscore.tsv,calculated_values/network_scores/gscan_dpw_mag...,P,GENE_NAME,drinks_per_week_gscan,bonferroni
1,neale_20153_irnt,magma,neale_20153_irnt_magma,control,input_files/neale_ctrl/20153_magma_output.tsv,,neale_20153_magma_zscore.tsv,calculated_values/network_scores/neale_20153_m...,P,SYMBOL,FEV1,bonferroni
2,neale_4104_irnt,magma,neale_4104_irnt_magma,control,input_files/neale_ctrl/4104_magma_output.tsv,tab,neale_4104_irnt_magma_zscore.tsv,calculated_values/network_scores/neale_4104_ir...,P,SYMBOL,heel_bone_density,bonferroni


In [None]:
#import seed genes- common
for row in range(len(common_datasets)):
    seed_genes = set(import_seedgenes(path=common_datasets['seed_path'][row], 
                                      pcol=common_datasets['seed_p'][row], 
                                      gene_col=common_datasets['seed_gene_name'][row], 
                                      delim=common_datasets['delim'][row],
                                     cutoff=common_datasets['cutoff'][row])[common_datasets['seed_gene_name'][row]])
    #filter for only genes in the interactome
    seed_genes = list(seed_genes.intersection(graph.nodes()))

    NPSc, Fnew_score, Fnew_rand_score = netprop_zscore.calculate_heat_zscores(
        w_double_prime,  
        list(graph.nodes()),
        dict(graph.degree), 
        seed_genes, num_reps=1000,
        minimum_bin_size=100)
    print(NPSc.head())
    print(('calculated_values/network_scores/'+common_datasets['label'][row]+'_zscore.tsv').lower())
    if save_file:
        NPSc.to_csv('calculated_values/network_scores/'+common_datasets['label'][row]+'_zscore.tsv',sep='\t',header=False)