purpose: runs network propagation (typically in pcnet v1.4) from seed genes saved in a file- accessed from meta data csv. if rerun==TRUE, then network propagation scores will be recalculated. Changes will not be saved.

# setup

In [4]:
#read in libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import matplotlib
from matplotlib_venn import venn2 
from scipy.stats import hypergeom
import statsmodels.stats.multitest
import rca_functions
import ndex2
import networkx as nx
from netcoloc import netprop_zscore
from netcoloc import netprop
from netcoloc import network_colocalization
from rca_functions import *

In [6]:
import os
os.chdir('/tscc/nfs/home/bsleger/bsl/rare_common_alcohol/')
os.getcwd()

'/tscc/projects/ps-palmer/brittany/rare_common_alcohol'

# Interactome Set-up

In [7]:
if os.path.isfile('environ_ndex_meta.py'):
    print ('True')
    from environ_ndex_meta import *
else:
    ndex_user=None
    ndex_password=None

True


In [8]:
interactome_name='pcnet_v14'

In [9]:
G_PC=import_interactome(interactome_name)

pcnet_v14
number of nodes:
18630

number of edges:
2687393


In [10]:
# pre calculate mats used for netprop... this step takes a few minutes, more for denser interactomes
print('\ncalculating w_prime')
w_prime = netprop.get_normalized_adjacency_matrix(G_PC, conserve_heat=True)

print('\ncalculating w_double_prime')
w_double_prime = netprop.get_individual_heats_matrix(w_prime, .5)


calculating w_prime

calculating w_double_prime


# common gene data analysis

In [11]:
datasets=pd.read_csv('common_datasets_prepub.csv',sep=',')

In [12]:
row_common=30

In [13]:
runsets=datasets[datasets['label'].str.contains('GSCAN_DPW_magma')]
runsets=runsets.reset_index()

In [14]:
runsets

Unnamed: 0,index,group,snp2gene_method,label,phenotype_group,seed_path,delim,zscore_file,zscore_path,seed_p,seed_gene_name,description,cutoff
0,0,GSCAN_DPW,magma,GSCAN_DPW_magma,alcohol,GSCAN_DPW/GSCAN_DPW_magma_results.csv,comma,gscan_dpw_magma_zscore.tsv,network_scores/gscan_dpw_magma_zscore.tsv,P,GENE_NAME,drinks_per_week_gscan,bonferroni


In [None]:
for row_common in range(len(runsets)):
    print('processing '+runsets['label'][row_common])
    run_net_prop(runsets['seed_path'][row_common], runsets['label'][row_common],runsets['seed_p'][row_common],
                 runsets['seed_gene_name'][row_common],runsets['delim'][row_common],runsets['cutoff'][row_common],
                 interactome_name, w_prime=)

In [7]:
data=['ADH1B','ADH1C']
trait_name='ADH1BandADH1C'

In [10]:
data=['ADH1C']
trait_name='ADH1C'

In [11]:
z_score, Fnew_score, Fnew_rand_score = netprop_zscore.calculate_heat_zscores(w_double_prime, pc_nodes, 
                                                            dict(G_PC.degree), 
                                                            data, num_reps=1000,
                                                            minimum_bin_size=100)
trait_name=trait_name
z_score.to_csv('network_scores/'+trait_name+'_zscore.tsv',sep='\t',header=False)
Fnew_score.to_csv('network_scores/'+trait_name+'_heats.tsv',sep='\t',header=False)
pd.DataFrame(Fnew_rand_score, columns=z_score.index).to_csv('network_scores/'+trait_name+'_randheats.tsv',sep='\t')
print(str(trait_name+'_zscore.tsv'))
print(str('network_scores/'+trait_name+'_zscore.tsv'))

  0%|          | 0/1000 [00:00<?, ?it/s]

ADH1C_zscore.tsv
network_scores/ADH1C_zscore.tsv


# rare gene data analysis

In [21]:
os.getcwd()

'/tscc/projects/ps-palmer/brittany/rare_common_alcohol'

In [19]:
datasets=pd.read_csv('rare_datasets_prepub.csv')
runsets=datasets
runsets=runsets.reset_index()

In [20]:
runsets

Unnamed: 0,index,label,cutoff used,seed_path,delim,zscore_file,zscore_path,Unnamed: 6,seed_gene_name,phenotype_group
0,0,alcoholintake_FDR_25,all tests FDR <0.25,rare_variant_genebass/alcohol_intake/alcohol_i...,comma,alcoholintake_fdr_25_zscore.tsv,network_scores/alcoholintake_FDR_25_zscore.tsv,network_scores/alcoholintake_fdr_25_zscore.tsv,Gene Name,alcohol
1,1,rare_neale_20153_irnt_FDR_25,all tests FDR <0.25,rare_variant_genebass/20153_irnt/20153_irnt_25...,tab,rare_neale_20153_irnt_fdr_25_zscore.tsv,network_scores/rare_neale_20153_irnt_FDR_25_zs...,network_scores/rare_neale_20153_irnt_fdr_25_zs...,Gene Name,control
2,2,rare_neale_30110_irnt_FDR_25,all tests FDR <0.25,rare_variant_genebass/30110_irnt/30110_irnt_25...,tab,rare_neale_30110_irnt_fdr_25_zscore.tsv,network_scores/rare_neale_30110_irnt_FDR_25_zs...,network_scores/rare_neale_30110_irnt_fdr_25_zs...,Gene Name,control
3,3,rare_neale_20016_FDR_25,all tests FDR <0.25,rare_variant_genebass/20016/20016_25FDR.tsv,tab,rare_neale_20016_fdr_25_zscore.tsv,network_scores/rare_neale_20016_FDR_25_zscore.tsv,network_scores/rare_neale_20016_fdr_25_zscore.tsv,Gene Name,control
4,4,rare_neale_20502_FDR_25,all tests FDR <0.25,rare_variant_genebass/20502/20502_25FDR.tsv,tab,rare_neale_20502_fdr_25_zscore.tsv,network_scores/rare_neale_20502_FDR_25_zscore.tsv,network_scores/rare_neale_20502_fdr_25_zscore.tsv,Gene Name,control
5,5,rare_neale_2443_FDR_25,all tests FDR <0.25,rare_variant_genebass/2443/2443_25FDR.tsv,tab,rare_neale_2443_fdr_25_zscore.tsv,network_scores/rare_neale_2443_FDR_25_zscore.tsv,network_scores/rare_neale_2443_fdr_25_zscore.tsv,Gene Name,control
6,6,rare_neale_4194_FDR_25,all tests FDR <0.25,rare_variant_genebass/4194/4194_25FDR.tsv,tab,rare_neale_4194_fdr_25_zscore.tsv,network_scores/rare_neale_4194_FDR_25_zscore.tsv,network_scores/rare_neale_4194_fdr_25_zscore.tsv,Gene Name,control
7,7,rare_neale_78_FDR_25,all tests FDR <0.25,rare_variant_genebass/78/78_25FDR.tsv,tab,rare_neale_78_fdr_25_zscore.tsv,network_scores/rare_neale_78_FDR_25_zscore.tsv,network_scores/rare_neale_78_fdr_25_zscore.tsv,Gene Name,control
8,8,rare_neale_C50_FDR_25,all tests FDR <0.25,rare_variant_genebass/C50/C50_25FDR.tsv,tab,rare_neale_c50_fdr_25_zscore.tsv,network_scores/rare_neale_C50_FDR_25_zscore.tsv,network_scores/rare_neale_c50_fdr_25_zscore.tsv,Gene Name,control
9,9,rare_neale_C44_FDR_25,all tests FDR <0.25,rare_variant_genebass/C44/C44_25FDR.tsv,tab,rare_neale_c44_fdr_25_zscore.tsv,network_scores/rare_neale_C44_FDR_25_zscore.tsv,network_scores/rare_neale_c44_fdr_25_zscore.tsv,Gene Name,control


In [None]:
for row in range(len(runsets)):
    print('processing '+runsets['label'][row])
    run_net_prop(runsets['seed_path'][row], runsets['label'][row],'0',runsets['seed_gene_name'][row],
                 runsets['delim'][row],'no_cutoff',
                interactome_name)