## Functions

In [1]:
import io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os, sys
from copy import deepcopy
import time

DATA = "./_Data/"
ORIG = DATA + "prostate_data_results/coef_original/paper/"
RERUN = DATA + "prostate_data_results/coef_original/rerun/"
PERM = DATA + "prostate_data_results/coef_permuted/"

In [2]:
def readData(layer, num=None):
    if num == None:
        df = pd.read_csv(ORIG + "coef_P-net_ALL_layerh" + str(layer) + ".csv")
        df.rename(columns={'Unnamed: 0': 'gene'}, inplace=True)
    else:
        df = pd.read_csv(RERUN + "coef_P-net_" + str(num) + "_layerh" + str(layer) + ".csv")
        df.rename(columns={'Unnamed: 0': 'gene'}, inplace=True)
    df["coef"] = df["coef"].abs()
    return df

def readPermutedData(layer, num):
    df = pd.read_csv(PERM + "coef_P-net_" + str(num) + "_layerh" + str(layer) + ".csv")
    df.rename(columns={'Unnamed: 0': 'gene'}, inplace=True)
    df["coef"] = df["coef"].abs()
    return df
    
def setBase(df):
    df.rename(columns={'coef': "original"}, inplace=True)
    
def merge(base, df, count):
    base[count] = df["coef"]
    
def loadPermutation(layer, count, model_num=None):
    base = readData(layer, model_num)
    setBase(base)
    for i in range(0, count):
        df = readPermutedData(layer, i)
        merge(base, df, i)
    return base

def rowOP(row):
    l = row[list(range(100))].to_numpy()
    if row["original"] == 0:
        row["pval"] = 1
    else:
        row["pval"] = sum(score > row["original"] for score in l) / len(l)
    return row
    
# find p-value of each SNP
def p_values(df):
    df["pval"] = 1
    df = df.apply(rowOP, axis=1)
    return df

# get a list of causal SNPs given p-values and thresholds
def top_importance(df, threshold):
    genes = []
    for i in range(len(df)):
        if df.iloc[i]["pval"] <= threshold:
            genes.append((i, df.iloc[i]["gene"]))
    return genes

## Layer 1

In [11]:
layer0 = loadPermutation(0, 100, 2)
layer0 = p_values(layer0)
l = top_importance(layer0, 0.05)

  base[count] = df["coef"]
  base[count] = df["coef"]
  df["pval"] = 1


In [9]:
layer0["pval"].unique()

array([0.33, 1.  , 0.69, 0.19, 0.55, 0.15, 0.24, 0.47, 0.03, 0.36, 0.42,
       0.46, 0.04, 0.93, 0.17, 0.3 , 0.67, 0.65, 0.99, 0.51, 0.38, 0.86,
       0.5 , 0.97, 0.6 , 0.76, 0.87, 0.71, 0.06, 0.84, 0.  , 0.02, 0.01,
       0.79, 0.9 , 0.12, 0.32, 0.74, 0.52, 0.58, 0.27, 0.18, 0.78, 0.08,
       0.44, 0.21, 0.39, 0.37, 0.09, 0.68, 0.94, 0.29, 0.11, 0.49, 0.77,
       0.98, 0.64, 0.14, 0.41, 0.62, 0.85, 0.23, 0.83, 0.05, 0.91, 0.45,
       0.35, 0.31, 0.13, 0.72, 0.26, 0.25, 0.07, 0.89, 0.4 , 0.95, 0.8 ,
       0.66, 0.43, 0.88, 0.7 , 0.2 , 0.92, 0.34, 0.81, 0.54, 0.63, 0.53,
       0.28, 0.61, 0.48, 0.73, 0.22, 0.1 , 0.16, 0.59, 0.96, 0.56, 0.57,
       0.75, 0.82])

In [15]:
len(l)

467

In [12]:
N = 10
top = np.argpartition(layer0['original'].to_numpy(), -N)[-N:]
layer0.iloc[top][layer0['pval'] < 0.05]

  layer0.iloc[top][layer0['pval'] < 0.05]


Unnamed: 0,gene,original,0,1,2,3,4,5,6,7,...,91,92,93,94,95,96,97,98,99,pval
4441,MAML3,1.962631,0.162878,0.162903,0.523062,0.000299,0.018347,0.007882,0.198334,0.015495,...,0.003947,0.067302,0.003224,0.006894,0.639979,0.109709,6e-06,0.003924,0.001578,0.0
4666,PDGFA,2.07585,9.3e-05,9.3e-05,0.019727,0.008311,0.158209,2.1e-05,0.017917,0.122042,...,2.093165,0.125638,0.395439,0.000147,0.06825,0.002541,0.023328,0.022556,0.016619,0.01
25,NUP98,2.091502,0.038195,0.038345,0.103274,0.187885,0.24158,0.53942,0.44711,1.166268,...,0.125516,0.012765,0.014579,0.128042,0.123236,0.520735,0.17556,3.390204,0.171631,0.03
1845,PSAP,2.142178,0.063155,0.063084,0.052865,0.000374,1.208208,0.191784,0.123581,0.001741,...,0.010719,0.17402,0.098685,0.102883,1.692225,3.160575,0.033314,0.030297,0.011672,0.01
8753,PSMD1,3.44858,0.392537,0.392519,0.356388,0.011416,0.000483,0.881729,1.260622,0.525812,...,0.008497,0.095794,0.234015,0.065487,0.209959,0.536225,0.311554,0.190858,0.663372,0.0
4109,AR,58.45143,7.641133,7.642173,7.018396,0.00122,1.414062,0.505876,0.103445,0.05793,...,0.485454,2.477941,0.645372,7.844319,0.115035,0.258104,0.055252,0.078059,0.123105,0.0
8321,RB1,3.142627,1.174189,1.174075,1.94728,0.001415,0.076219,0.602741,0.264239,0.356399,...,0.003578,0.020616,0.000318,0.012948,2.027823,0.012043,0.002441,2.731956,0.325395,0.01
7077,PTEN,13.080307,1.739618,1.739824,1.870816,5.57883,3.046595,2.090399,7.007099,0.210062,...,11.790235,1.853595,11.322998,0.500341,4.319767,1.367123,2.606936,2.032172,1.643378,0.03
7800,TP53,22.028597,1.44509,1.445375,2.267962,9.703601,4.738791,12.611574,16.472885,18.61997,...,3.060888,0.611779,7.569818,1.961593,2.62208,7.084734,0.490473,25.569157,1.56735,0.02


In [8]:
layer0[layer0['gene'] == 'MDM4']

Unnamed: 0,gene,original,0,1,2,3,4,5,6,7,...,91,92,93,94,95,96,97,98,99,pval
2108,MDM4,1.088148,0.035727,0.035802,0.051863,0.205975,0.460853,0.812725,0.094818,0.00168,...,0.153175,0.085611,0.001929,0.001109,0.050771,0.1131,0.000407,2.152207,0.595239,0.07


In [9]:
layer0[layer0['gene'] == 'FGFR1']

Unnamed: 0,gene,original,0,1,2,3,4,5,6,7,...,91,92,93,94,95,96,97,98,99,pval
3297,FGFR1,1.005486,0.00644,0.006461,0.018611,0.122011,0.003052,1.139678e-13,0.316396,0.004379,...,0.394119,0.453169,1.29563,0.00891,0.310374,0.002976,0.285507,0.15039,0.001468,0.03


In [10]:
layer0[layer0['gene'] == 'NOTCH1']

Unnamed: 0,gene,original,0,1,2,3,4,5,6,7,...,91,92,93,94,95,96,97,98,99,pval
2919,NOTCH1,0.728791,0.001489,0.001494,0.003078,0.020134,0.000193,0.001985,0.000827,0.004102,...,1.5e-05,0.175281,0.00031,0.003097,0.002303,0.006579,0.000631,0.042421,9.039237e-07,0.0


In [11]:
layer0[layer0['gene'] == 'PDGFA']

Unnamed: 0,gene,original,0,1,2,3,4,5,6,7,...,91,92,93,94,95,96,97,98,99,pval
4666,PDGFA,1.980735,9.3e-05,9.3e-05,0.019727,0.008311,0.158209,2.1e-05,0.017917,0.122042,...,2.093165,0.125638,0.395439,0.000147,0.06825,0.002541,0.023328,0.022556,0.016619,0.01


In [7]:
layer0[layer0['gene'] == 'MDM2']

Unnamed: 0,gene,original,0,1,2,3,4,5,6,7,...,91,92,93,94,95,96,97,98,99,pval
2107,MDM2,1.732088,0.03361,0.033558,0.009178,0.057742,0.031447,0.136337,0.641645,1.490247,...,0.16218,0.037927,0.020893,0.515455,0.445116,9.4e-05,0.104957,0.025457,0.130012,0.02


In [14]:
np.mean(layer0['original'])

0.03704294342290399

## Layer 2

In [32]:
layer1 = loadPermutation(1, 100)
layer1 = p_values(layer1)
l = top_importance(layer1, 0.05)

  base[count] = df["coef"]
  base[count] = df["coef"]
  df["pval"] = 1


In [33]:
len(l)

157

In [None]:
N = 50
top = np.argpartition(layer1[0].to_numpy(), -N)[-N:]
layer1.iloc[top][layer1['pval'] < 0.05]

  layer1.iloc[top][layer1['pval'] < 0.05]


Unnamed: 0,gene,original,0,1,2,3,4,5,6,7,...,91,92,93,94,95,96,97,98,99,pval
695,R-HSA-8852276,3.167461,0.375847,0.375846,0.441034,0.159518,0.010337,0.992751,0.677151,2.288295,...,0.005104688,0.07205395,0.393653,2.4e-05,0.009969,1.287812,4.1e-05,1.454786,0.0529129,0.01
618,R-HSA-72662,0.757427,0.696359,0.696813,0.202093,0.009622,0.023179,0.487179,0.004717,0.432896,...,1.068279e-05,3.60654e-07,0.010202,0.050713,1.3e-05,1e-06,7.6e-05,0.009167,0.0505583,0.02
889,R-HSA-2299718,3.634889,0.493066,0.49324,0.683768,0.254942,0.193842,0.238095,0.035389,0.240084,...,0.001456603,0.007960952,2.023199,0.000488,1.025814,0.254385,0.002098,0.161997,0.008111349,0.0
565,R-HSA-4570464,0.506019,0.390612,0.390646,0.302689,0.012852,2.8e-05,0.065582,0.000301,0.019889,...,0.02867003,0.197559,0.188153,0.041937,0.20038,0.006068,0.000449,0.207186,0.2744201,0.04
117,R-HSA-380320,2.331446,1.035685,1.036075,1.829305,0.481658,0.290411,0.315407,0.297382,2.153634,...,0.04718814,0.5090677,0.594194,0.006177,0.393059,0.015054,0.86159,1.06815,0.2884708,0.03
987,R-HSA-5633007,7.200807,1.221093,1.222624,2.228687,0.342694,0.345277,3.948292,0.496533,1.194892,...,0.08806235,0.08940271,2.26754,1.885154,3.437395,1.857115,0.689086,2.241692,0.5052894,0.0
897,R-HSA-1912408,1.247639,0.545716,0.545562,0.755152,0.010359,0.022667,0.092378,0.003774,0.146498,...,0.00380082,0.01891875,0.025907,0.208946,0.02268,0.004627,4.2e-05,1.4e-05,5.06072e-08,0.0
927,R-HSA-156827,1.160133,0.634713,0.634973,0.030591,0.003077,0.012384,0.853434,0.008281,0.32526,...,2.990169e-08,3.118642e-09,0.000736,0.05523,3e-06,0.000922,0.000456,0.000247,0.035717,0.02
1016,R-HSA-983168,11.946217,0.605489,0.604773,1.439641,1.825894,0.136391,1.252098,0.278386,4.712413,...,0.3234322,0.1373848,0.501359,0.561676,3.210907,0.25066,0.926881,0.366942,2.205876,0.01
542,R-HSA-4090294,9.4221,0.381979,0.382306,0.513352,0.003645,1.1e-05,0.189852,0.000337,0.068196,...,0.01860404,0.05678438,0.080769,1.875063,0.018141,0.011694,1e-06,0.116016,0.02458992,0.0


## Layer 3

In [26]:
layer2 = loadPermutation(2, 100)
layer2 = p_values(layer2)
l = top_importance(layer2, 0.05)

  base[count] = df["coef"]
  base[count] = df["coef"]
  df["pval"] = 1


In [14]:
len(l)

112

## Layer 4

In [11]:
layer3 = loadPermutation(3, 100)
layer3 = p_values(layer3)
l = top_importance(layer3, 0.05)

  base[count] = df["coef"]
  base[count] = df["coef"]
  df["pval"] = 1


In [12]:
len(l)

81

## Layer 5

In [15]:
layer4 = loadPermutation(4, 100)
layer4 = p_values(layer4)
l = top_importance(layer4, 0.05)

  base[count] = df["coef"]
  base[count] = df["coef"]
  df["pval"] = 1


In [16]:
len(l)

27

In [17]:
layer4

Unnamed: 0,gene,original,0,1,2,3,4,5,6,7,...,91,92,93,94,95,96,97,98,99,pval
0,R-HSA-9006936,2.310499e-02,2.279284e-04,2.278466e-04,8.381681e-03,3.987842e-03,4.897469e-04,1.796722e-05,1.138944e-02,4.740604e-06,...,6.035870e-05,4.852362e-02,1.513166e-04,7.085444e-04,1.645121e-03,0.000006,2.849894e-05,3.241477e-03,4.304211e-05,0.10
1,R-HSA-1368071,1.965697e-08,0.000000e+00,0.000000e+00,9.627909e-08,5.662374e-09,5.490224e-08,3.035778e-06,3.616813e-05,1.366621e-06,...,3.757950e-07,6.081877e-07,8.000598e-09,1.613948e-07,4.815179e-04,0.000039,0.000000e+00,6.258026e-07,2.793095e-07,0.57
2,R-HSA-157118,2.826089e+00,5.508351e-01,5.506814e-01,7.251450e-01,3.368174e-03,2.328104e-03,9.057184e-02,1.591363e-02,1.491914e-01,...,3.784634e-03,6.800730e-02,4.372453e-02,1.618339e-01,9.000275e-01,0.037558,4.261814e-03,1.774311e-02,9.457885e-05,0.00
3,R-HSA-71737,0.000000e+00,1.702824e-08,1.702791e-08,0.000000e+00,1.427502e-09,0.000000e+00,0.000000e+00,0.000000e+00,3.633266e-09,...,0.000000e+00,1.335933e-05,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,1.00
4,R-HSA-1592230,2.087509e-10,7.114786e-04,7.111722e-04,1.359462e-04,2.032949e-06,2.361284e-05,9.828565e-06,6.722924e-05,1.121588e-06,...,6.901819e-09,6.940975e-07,1.108264e-04,2.069639e-03,1.094423e-04,0.000543,1.351973e-04,8.822049e-02,9.960236e-06,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
142,R-HSA-5660526,2.050515e-10,0.000000e+00,0.000000e+00,1.043550e-07,0.000000e+00,0.000000e+00,2.382284e-04,7.733158e-06,2.767316e-04,...,2.701554e-07,0.000000e+00,1.738044e-09,7.745151e-05,5.493369e-05,0.000000,1.065028e-09,2.436040e-08,0.000000e+00,0.52
143,R-HSA-445355,2.735055e-08,1.589929e-04,1.590307e-04,1.509825e-02,1.972291e-03,5.513595e-06,4.307636e-05,1.139469e-04,1.998451e-03,...,0.000000e+00,4.619731e-04,2.347757e-04,1.296741e-09,0.000000e+00,0.003754,4.516231e-08,1.118361e-06,2.263681e-02,0.84
144,R-HSA-5223345,6.431515e-07,4.145328e-05,4.149550e-05,1.730141e-05,4.528956e-02,2.760784e-09,1.215772e-04,3.700311e-04,2.578464e-03,...,0.000000e+00,1.334285e-08,2.299137e-08,3.576582e-05,0.000000e+00,0.000387,1.646022e-11,0.000000e+00,0.000000e+00,0.38
145,R-HSA-5682910,1.341577e-08,3.566903e-05,3.565498e-05,0.000000e+00,0.000000e+00,9.299316e-07,2.804422e-10,1.039423e-09,0.000000e+00,...,0.000000e+00,7.945387e-06,3.252650e-09,8.140404e-11,1.528695e-09,0.000000,2.695745e-09,1.423982e-04,2.336590e-03,0.50


## Layer 6

In [18]:
layer5 = loadPermutation(5, 100)
layer5 = p_values(layer5)
l = top_importance(layer5, 0.05)

  base[count] = df["coef"]
  base[count] = df["coef"]
  df["pval"] = 1


In [19]:
len(l)

9

In [20]:
layer5

Unnamed: 0,gene,original,0,1,2,3,4,5,6,7,...,91,92,93,94,95,96,97,98,99,pval
0,R-HSA-397014,6.863754e-05,0.02256323,0.02258382,0.0106556,0.001972356,1.034922e-05,2.9e-05,0.012818,0.00991297,...,0.003321698,0.009700288,0.001749171,0.006622843,0.001207533,0.018857,0.01134246,0.001547513,0.022818,0.8
1,R-HSA-168256,50.29035,4.070901,4.058387,3.244412,0.08150013,5.488005,9.065948,5.961862,5.32635,...,8.124285,7.704827,2.972904,2.042528,7.609162,10.442201,1.058722,2.698065,0.929391,0.0
2,R-HSA-382551,0.04467051,0.0001417649,0.0001419296,0.00277761,0.06300511,0.0002573889,0.044611,0.000947,0.0005292779,...,0.001072913,0.05192969,0.009983694,0.004817984,7.636636e-05,0.001522,0.0005376917,0.0001264546,0.022931,0.12
3,R-HSA-1500931,1.011585e-08,0.0002176852,0.0002170146,1.736811e-05,0.2161382,0.002190553,0.023684,0.300558,0.0003860723,...,0.0001988189,0.1079368,9.41736e-09,5.017657e-08,5.24851e-06,5.4e-05,0.009935345,0.07866646,0.004555,0.99
4,R-HSA-5357801,0.2057404,0.07667931,0.07677799,0.01502346,0.01427826,0.007938133,0.053966,0.004834,0.0005560344,...,0.005580279,1.373849e-05,0.001460408,0.0008258923,0.1328546,0.000572,0.002081616,0.1507159,0.074759,0.12
5,R-HSA-8963743,4.133686e-05,2.309491e-06,2.305793e-06,5.357172e-06,0.000176037,0.000444639,0.001724,0.000627,0.000494741,...,1.002321e-08,1.978265e-07,3.879232e-05,2.687702e-10,8.547633e-10,0.000341,3.385887e-08,0.0006235623,7.2e-05,0.33
6,R-HSA-1430728,2.096561,0.06320185,0.06328271,0.00278305,0.01663404,0.05981026,1.9e-05,0.034837,0.002731228,...,0.04029207,0.7632456,0.008803723,0.009053007,0.02118916,0.021987,0.03369707,0.09714866,0.014953,0.0
7,R-HSA-1640170,28.54512,3.218846,3.217432,3.802036,2.589647,1.67691,4.364507,0.162745,9.857768,...,0.02009866,0.8841577,4.083199,0.3892945,2.788985,1.477176,4.266749,4.936364,1.042791,0.0
8,R-HSA-1266738,0.08154131,0.05380452,0.05373215,0.2204698,0.3771131,0.2360288,0.152163,0.288272,0.1594553,...,0.02207018,0.0575873,1.298855,0.02903057,0.06642436,0.75742,0.04739473,0.002199742,0.314984,0.65
9,R-HSA-74160,43.35403,15.60098,15.6002,15.09457,3.010567,0.4770518,19.233953,0.073593,10.91027,...,1.513876,0.4971372,8.992366,1.478435,16.70153,10.576977,2.389405,5.923081,0.577065,0.0
