# Analyzing SARS-Cov-2 data

In [1]:
import hypernetx as hnx
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import sys

In [2]:
filedir = "/Users/feng626/covid19/SARSCov2/data/originalData"
bigtransFile = f'{filedir}/bigTrans/bigTrans.txt'
sars2File = f'{filedir}/GSE147507_RawReadCounts_Human.tsv'

In [3]:
dft = pd.read_csv(bigtransFile, sep = '\t')
dft.head()

Unnamed: 0,EB1_WT_0h__b,EB1_WT_0h__p,EB1_WT_00h__b,EB1_WT_00h__p,EB1_WT_8h__b,EB1_WT_8h__p,EB1_WT_18h__b,EB1_WT_18h__p,EB1_WT_24h__b,EB1_WT_24h__p,...,mouse_ln_WNV_WT_6d__b,mouse_ln_WNV_WT_6d__p,mouse_ln_WNV_E218A_1d__b,mouse_ln_WNV_E218A_1d__p,mouse_ln_WNV_E218A_2d__b,mouse_ln_WNV_E218A_2d__p,mouse_ln_WNV_E218A_4d__b,mouse_ln_WNV_E218A_4d__p,mouse_ln_WNV_E218A_6d__b,mouse_ln_WNV_E218A_6d__p
AAAS,-0.053539,0.897905,-0.021629,0.971758,0.06945,0.893481,0.390366,0.150667,0.073836,0.705193,...,0.818601,0.199452,0.675964,0.000681,1.295524,0.000307,1.474642,0.000588,1.359757,0.012836
AACS,0.031504,0.896717,0.131252,0.409202,0.309998,0.244921,0.401375,0.037433,0.165346,0.35411,...,-1.549177,0.038906,-0.788553,0.123942,-0.743268,0.03809,-0.632166,0.112165,-1.220868,0.041723
AADAC,-0.04166,0.837731,0.031732,0.90552,-0.106712,0.351493,-0.224353,0.169825,-0.045614,0.834415,...,0.036319,0.768323,0.030294,0.781156,0.03026,0.748385,0.020523,0.891356,0.009379,0.963882
AAK1,0.139425,0.507731,0.148185,0.805341,0.25124,0.196918,0.290396,0.058642,0.383917,0.08219,...,0.235774,0.359214,0.871901,0.009966,1.341327,0.03406,-0.485618,0.168506,-1.095028,0.10079
AAMP,0.139837,0.470499,0.005684,0.988458,0.077773,0.496487,0.128518,0.292702,0.132485,0.179721,...,-1.181211,0.237053,0.389658,0.346917,-0.150656,0.560934,-0.244075,0.553876,-0.914955,0.221456


In [4]:
list(dft.columns)

['EB1_WT_0h__b',
 'EB1_WT_0h__p',
 'EB1_WT_00h__b',
 'EB1_WT_00h__p',
 'EB1_WT_8h__b',
 'EB1_WT_8h__p',
 'EB1_WT_18h__b',
 'EB1_WT_18h__p',
 'EB1_WT_24h__b',
 'EB1_WT_24h__p',
 'EB1_WT_48h__b',
 'EB1_WT_48h__p',
 'EB1_mucin_0h__b',
 'EB1_mucin_0h__p',
 'EB1_mucin_00h__b',
 'EB1_mucin_00h__p',
 'EB1_mucin_8h__b',
 'EB1_mucin_8h__p',
 'EB1_mucin_18h__b',
 'EB1_mucin_18h__p',
 'EB1_mucin_24h__b',
 'EB1_mucin_24h__p',
 'EB1_mucin_48h__b',
 'EB1_mucin_48h__p',
 'EB1_ssGP_0h__b',
 'EB1_ssGP_0h__p',
 'EB1_ssGP_00h__b',
 'EB1_ssGP_00h__p',
 'EB1_ssGP_8h__b',
 'EB1_ssGP_8h__p',
 'EB1_ssGP_18h__b',
 'EB1_ssGP_18h__p',
 'EB1_ssGP_24h__b',
 'EB1_ssGP_24h__p',
 'EB1_ssGP_48h__b',
 'EB1_ssGP_48h__p',
 'EB2_WT_deltaVP30_0h__b',
 'EB2_WT_deltaVP30_0h__p',
 'EB2_WT_deltaVP30_8h__b',
 'EB2_WT_deltaVP30_8h__p',
 'EB2_WT_deltaVP30_24h__b',
 'EB2_WT_deltaVP30_24h__p',
 'EB2_WT_deltaVP30_48h__b',
 'EB2_WT_deltaVP30_48h__p',
 'EB2_WT_deltaVP30_72h__b',
 'EB2_WT_deltaVP30_72h__p',
 'H7N9_calu3_WT_0hr__b',
 'H

In [5]:
dftcolparse = dict()
for col in dft.columns:
    dftcolparse[col] = set(col.split('_'))

dfthuman = [c for c in dft.columns if (('mouse' not in dftcolparse[c]) and ('b' in dftcolparse[c]))]
dfthuman

['EB1_WT_0h__b',
 'EB1_WT_00h__b',
 'EB1_WT_8h__b',
 'EB1_WT_18h__b',
 'EB1_WT_24h__b',
 'EB1_WT_48h__b',
 'EB1_mucin_0h__b',
 'EB1_mucin_00h__b',
 'EB1_mucin_8h__b',
 'EB1_mucin_18h__b',
 'EB1_mucin_24h__b',
 'EB1_mucin_48h__b',
 'EB1_ssGP_0h__b',
 'EB1_ssGP_00h__b',
 'EB1_ssGP_8h__b',
 'EB1_ssGP_18h__b',
 'EB1_ssGP_24h__b',
 'EB1_ssGP_48h__b',
 'EB2_WT_deltaVP30_0h__b',
 'EB2_WT_deltaVP30_8h__b',
 'EB2_WT_deltaVP30_24h__b',
 'EB2_WT_deltaVP30_48h__b',
 'EB2_WT_deltaVP30_72h__b',
 'H7N9_calu3_WT_0hr__b',
 'H7N9_calu3_WT_7hr__b',
 'H7N9_calu3_WT_12hr__b',
 'H7N9_calu3_WT_24hr__b',
 'H7N9_calu3_FM_0hr__b',
 'H7N9_calu3_FM_7hr__b',
 'H7N9_calu3_FM_12hr__b',
 'H7N9_calu3_FM_24hr__b',
 'H7N9_calu3_X691_0hr__b',
 'H7N9_calu3_X691_7hr__b',
 'H7N9_calu3_X691_12hr__b',
 'H7N9_calu3_X691_24hr__b',
 'H5N1_calu3_WT_0hr__b',
 'H5N1_calu3_WT_7hr__b',
 'H5N1_calu3_WT_12hr__b',
 'H5N1_calu3_WT_24hr__b',
 'H5N1_calu3_X627E_0hr__b',
 'H5N1_calu3_X627E_7hr__b',
 'H5N1_calu3_X627E_12hr__b',
 'H5N1_calu3_

In [6]:
dfh = dft[dfthuman]
dfh

Unnamed: 0,EB1_WT_0h__b,EB1_WT_00h__b,EB1_WT_8h__b,EB1_WT_18h__b,EB1_WT_24h__b,EB1_WT_48h__b,EB1_mucin_0h__b,EB1_mucin_00h__b,EB1_mucin_8h__b,EB1_mucin_18h__b,...,calu3_SARS__b_ExoNI_moi1_60h,calu3_SARS__b_ExoNI_moi1_72h,calu3_SARS__b_nsp16_moi5_0h,calu3_SARS__b_nsp16_moi5_7h,calu3_SARS__b_nsp16_moi5_12h,calu3_SARS__b_nsp16_moi5_24h,calu3_SARS__b_nsp16_moi5_36h,calu3_SARS__b_nsp16_moi5_48h,calu3_SARS__b_nsp16_moi5_60h,calu3_SARS__b_nsp16_moi5_72h
AAAS,-0.053539,-0.021629,0.069450,0.390366,0.073836,-0.442246,-0.082826,-0.422886,-0.045865,0.308248,...,0.032925,0.137517,0.035045,0.052741,0.007945,-0.035023,0.161498,-0.190269,-0.092381,-0.055857
AACS,0.031504,0.131252,0.309998,0.401375,0.165346,0.079663,-0.291830,-0.386167,0.207874,0.335756,...,-0.153420,0.137480,-0.013850,-0.000476,-0.081773,0.137041,0.147569,-0.175630,-0.074628,0.162620
AADAC,-0.041660,0.031732,-0.106712,-0.224353,-0.045614,-0.890692,-0.040650,0.053375,-0.220946,-0.279067,...,0.233175,0.218920,0.191387,0.394776,-0.225634,-0.009850,-0.563230,-0.054222,0.463434,1.121392
AAK1,0.139425,0.148185,0.251240,0.290396,0.383917,0.680079,-0.185509,0.495312,-0.189335,0.234855,...,0.265520,-0.325278,0.199622,-0.229799,0.292203,-0.436667,0.034660,0.276221,0.386410,-0.384465
AAMP,0.139837,0.005684,0.077773,0.128518,0.132485,0.122469,0.025393,0.055635,0.113951,0.080511,...,-0.104224,0.143239,-0.074407,-0.060982,-0.054198,0.162818,0.108959,-0.065833,-0.096275,0.085290
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.045434,-0.015377,-0.169418,-0.392423,0.231872,-0.102981,-0.152677,-0.130969,-0.121257,-0.187774,...,-0.232420,0.153464,-0.098613,-0.092549,-0.240960,0.141063,-0.158400,-0.112629,-0.295572,0.069981
ZYG11B,-0.065778,0.048467,-0.124070,-0.093041,-0.090427,-0.550867,-0.152835,-0.038874,-0.064925,-0.487526,...,0.258713,0.128794,-0.231901,0.141698,0.109851,-0.102701,-0.172543,-0.286687,-0.276806,-0.064110
ZYX,0.045843,-0.164044,0.114723,0.658516,0.456055,0.298952,-0.443505,-0.754256,-0.023101,0.849493,...,-0.261560,0.162087,-0.098509,0.138738,-0.280162,0.035223,0.168304,-0.130368,-0.144799,-0.077492
ZZEF1,0.137463,-0.011429,0.198544,0.355522,0.306401,0.334437,-0.138179,0.158692,0.134387,0.329527,...,0.203721,0.200494,-0.031792,-0.029531,0.195086,-0.129848,0.079408,0.079414,0.241151,0.286260


In [7]:
dfs = pd.read_csv(sars2File, sep = '\t', index_col=0)
dfs.head()

Unnamed: 0,Series1_NHBE_Mock_1,Series1_NHBE_Mock_2,Series1_NHBE_Mock_3,Series1_NHBE_SARS-CoV-2_1,Series1_NHBE_SARS-CoV-2_2,Series1_NHBE_SARS-CoV-2_3,Series2_A549_Mock_1,Series2_A549_Mock_2,Series2_A549_Mock_3,Series2_A549_SARS-CoV-2_1,...,Series15_COVID19Lung_1,Series16_A549-ACE2_Mock_1,Series16_A549-ACE2_Mock_2,Series16_A549-ACE2_Mock_3,Series16_A549-ACE2_SARS-CoV-2_1,Series16_A549-ACE2_SARS-CoV-2_2,Series16_A549-ACE2_SARS-CoV-2_3,Series16_A549-ACE2_SARS-CoV-2_Rux_1,Series16_A549-ACE2_SARS-CoV-2_Rux_2,Series16_A549-ACE2_SARS-CoV-2_Rux_3
DDX11L1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
WASH7P,29,24,23,34,19,44,68,43,33,65,...,0,0,11,7,2,6,5,12,6,8
FAM138A,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
FAM138F,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
OR4F5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
dfscolparse = dict()
for col in dfs.columns:
    dfscolparse[col] = set(col.split('_'))
    
dfscolparse

{'Series1_NHBE_Mock_1': {'1', 'Mock', 'NHBE', 'Series1'},
 'Series1_NHBE_Mock_2': {'2', 'Mock', 'NHBE', 'Series1'},
 'Series1_NHBE_Mock_3': {'3', 'Mock', 'NHBE', 'Series1'},
 'Series1_NHBE_SARS-CoV-2_1': {'1', 'NHBE', 'SARS-CoV-2', 'Series1'},
 'Series1_NHBE_SARS-CoV-2_2': {'2', 'NHBE', 'SARS-CoV-2', 'Series1'},
 'Series1_NHBE_SARS-CoV-2_3': {'3', 'NHBE', 'SARS-CoV-2', 'Series1'},
 'Series2_A549_Mock_1': {'1', 'A549', 'Mock', 'Series2'},
 'Series2_A549_Mock_2': {'2', 'A549', 'Mock', 'Series2'},
 'Series2_A549_Mock_3': {'3', 'A549', 'Mock', 'Series2'},
 'Series2_A549_SARS-CoV-2_1': {'1', 'A549', 'SARS-CoV-2', 'Series2'},
 'Series2_A549_SARS-CoV-2_2': {'2', 'A549', 'SARS-CoV-2', 'Series2'},
 'Series2_A549_SARS-CoV-2_3': {'3', 'A549', 'SARS-CoV-2', 'Series2'},
 'Series3_A549_Mock_1': {'1', 'A549', 'Mock', 'Series3'},
 'Series3_A549_Mock_2': {'2', 'A549', 'Mock', 'Series3'},
 'Series3_A549_RSV_1': {'1', 'A549', 'RSV', 'Series3'},
 'Series3_A549_RSV_2': {'2', 'A549', 'RSV', 'Series3'},
 'Se

In [9]:
clusters = [[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]],[[13,14],[15,16]],[[17,18],[19,20]],[[21,22,23],[24,25,26]],[[27,28,29],[30,31,32]],[[33,34,35],[36,37,38]],[[39,40,41],[42,43,44]],[[39,40,41],[45,46,47]],[[48,49,50,51],[52,53,54,55]],[[48,49,50,51],[56,57,58,59]],[[66,67],[68,69]],[[70,71,72],[73,74,75]],[[70,71,72],[76,77,78]]]
clusters

[[[1, 2, 3], [4, 5, 6]],
 [[7, 8, 9], [10, 11, 12]],
 [[13, 14], [15, 16]],
 [[17, 18], [19, 20]],
 [[21, 22, 23], [24, 25, 26]],
 [[27, 28, 29], [30, 31, 32]],
 [[33, 34, 35], [36, 37, 38]],
 [[39, 40, 41], [42, 43, 44]],
 [[39, 40, 41], [45, 46, 47]],
 [[48, 49, 50, 51], [52, 53, 54, 55]],
 [[48, 49, 50, 51], [56, 57, 58, 59]],
 [[66, 67], [68, 69]],
 [[70, 71, 72], [73, 74, 75]],
 [[70, 71, 72], [76, 77, 78]]]

In [10]:
experiments = ["Series1_NHBE_SARS-CoV-2","Series2_A549_SARS-CoV-2","Series3_A549_RSV","Series4_A549_IAV","Series5_A549_SARS-CoV-2","Series6_A549-ACE2_SARS-CoV-2","Series7_Calu3_SARS-CoV-2","Series8_A549_RSV","Series8_A549_HPIV3","Series9_NHBE_IAV","Series9_NHBE_IAVdNS1","Series15_COVID19Lung","Series16_A549-ACE2_SARS-CoV-2","Series16_A549-ACE2_SARS-CoV-2_Rux"]
experiments

['Series1_NHBE_SARS-CoV-2',
 'Series2_A549_SARS-CoV-2',
 'Series3_A549_RSV',
 'Series4_A549_IAV',
 'Series5_A549_SARS-CoV-2',
 'Series6_A549-ACE2_SARS-CoV-2',
 'Series7_Calu3_SARS-CoV-2',
 'Series8_A549_RSV',
 'Series8_A549_HPIV3',
 'Series9_NHBE_IAV',
 'Series9_NHBE_IAVdNS1',
 'Series15_COVID19Lung',
 'Series16_A549-ACE2_SARS-CoV-2',
 'Series16_A549-ACE2_SARS-CoV-2_Rux']

In [11]:
dfsc = pd.concat([(dfs.iloc[:,np.array(cluster)[1]-1].transform(np.log2).mean(axis = 1) - dfs.iloc[:,np.array(cluster)[0]-1].transform(np.log2).mean(axis = 1)).replace([np.inf, -np.inf, np.nan], 0) for cluster in clusters], axis = 1)

In [12]:
dfsc.columns = experiments
dfsc.to_pickle("sars2cleaned.pkl")
dfsc

Unnamed: 0,Series1_NHBE_SARS-CoV-2,Series2_A549_SARS-CoV-2,Series3_A549_RSV,Series4_A549_IAV,Series5_A549_SARS-CoV-2,Series6_A549-ACE2_SARS-CoV-2,Series7_Calu3_SARS-CoV-2,Series8_A549_RSV,Series8_A549_HPIV3,Series9_NHBE_IAV,Series9_NHBE_IAVdNS1,Series15_COVID19Lung,Series16_A549-ACE2_SARS-CoV-2,Series16_A549-ACE2_SARS-CoV-2_Rux
DDX11L1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
WASH7P,0.276106,0.450996,-1.712436,-2.160964,-0.037497,-2.889628,-0.343641,-0.806679,-0.883788,-1.330497,-0.131567,0.0,0.0,0.0
FAM138A,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
FAM138F,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
OR4F5,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CDY1B,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
CDY1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
CSPG4P1Y,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
GOLGA2P3Y,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0


In [13]:
dfscRowNames = dfsc.index.values
dfhRowNames = dfh.index.values

In [14]:
commonGeneNames = np.intersect1d(dfscRowNames, dfhRowNames)
commonGeneNames

array(['AAAS', 'AACS', 'AADAC', ..., 'ZYX', 'ZZEF1', 'ZZZ3'], dtype=object)

In [15]:
import copy
usefulCols = copy.copy(experiments)
dropping = [experiments[i] for i in [1,4,11]]
[usefulCols.remove(drop) for drop in dropping]
usefulCols

['Series1_NHBE_SARS-CoV-2',
 'Series3_A549_RSV',
 'Series4_A549_IAV',
 'Series6_A549-ACE2_SARS-CoV-2',
 'Series7_Calu3_SARS-CoV-2',
 'Series8_A549_RSV',
 'Series8_A549_HPIV3',
 'Series9_NHBE_IAV',
 'Series9_NHBE_IAVdNS1',
 'Series16_A549-ACE2_SARS-CoV-2',
 'Series16_A549-ACE2_SARS-CoV-2_Rux']

In [16]:
sars2Cols = [colName for colName in usefulCols if ("SARS" in colName) ]
sars2Cols

['Series1_NHBE_SARS-CoV-2',
 'Series6_A549-ACE2_SARS-CoV-2',
 'Series7_Calu3_SARS-CoV-2',
 'Series16_A549-ACE2_SARS-CoV-2',
 'Series16_A549-ACE2_SARS-CoV-2_Rux']

In [17]:
nosars2Cols = [colName for colName in usefulCols if ("SARS" not in colName) ]
nosars2Cols

['Series3_A549_RSV',
 'Series4_A549_IAV',
 'Series8_A549_RSV',
 'Series8_A549_HPIV3',
 'Series9_NHBE_IAV',
 'Series9_NHBE_IAVdNS1']

In [18]:
dfhf = dfh.loc[commonGeneNames]
dfsf = dfsc.loc[commonGeneNames]

In [19]:
dff = pd.concat([dfhf,dfsf[usefulCols]], axis = 1)
dff

Unnamed: 0,EB1_WT_0h__b,EB1_WT_00h__b,EB1_WT_8h__b,EB1_WT_18h__b,EB1_WT_24h__b,EB1_WT_48h__b,EB1_mucin_0h__b,EB1_mucin_00h__b,EB1_mucin_8h__b,EB1_mucin_18h__b,...,Series3_A549_RSV,Series4_A549_IAV,Series6_A549-ACE2_SARS-CoV-2,Series7_Calu3_SARS-CoV-2,Series8_A549_RSV,Series8_A549_HPIV3,Series9_NHBE_IAV,Series9_NHBE_IAVdNS1,Series16_A549-ACE2_SARS-CoV-2,Series16_A549-ACE2_SARS-CoV-2_Rux
AAAS,-0.053539,-0.021629,0.069450,0.390366,0.073836,-0.442246,-0.082826,-0.422886,-0.045865,0.308248,...,-1.989792,-1.184323,-2.474763,-0.570161,-0.351737,0.351903,-0.103042,0.155755,-1.711231,-1.278946
AACS,0.031504,0.131252,0.309998,0.401375,0.165346,0.079663,-0.291830,-0.386167,0.207874,0.335756,...,-2.202171,-0.946714,-2.731488,-0.753072,1.162057,1.322487,0.342570,0.522355,-1.458900,-0.959009
AADAC,-0.041660,0.031732,-0.106712,-0.224353,-0.045614,-0.890692,-0.040650,0.053375,-0.220946,-0.279067,...,-1.719463,-0.370890,-1.723320,-0.723308,1.957697,2.213505,0.000000,0.000000,0.534023,0.789086
AAK1,0.139425,0.148185,0.251240,0.290396,0.383917,0.680079,-0.185509,0.495312,-0.189335,0.234855,...,-1.983893,-0.421551,-2.154003,0.651957,1.864634,1.745253,0.652229,0.388312,-0.427195,-0.154886
AAMP,0.139837,0.005684,0.077773,0.128518,0.132485,0.122469,0.025393,0.055635,0.113951,0.080511,...,-1.860399,-1.139992,-2.301581,-0.598850,0.854072,1.078711,0.229610,0.488917,-1.715057,-1.392461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.045434,-0.015377,-0.169418,-0.392423,0.231872,-0.102981,-0.152677,-0.130969,-0.121257,-0.187774,...,-1.651267,-0.765669,-1.987821,0.261876,1.237991,1.567384,0.101000,0.519108,-0.767060,-0.511800
ZYG11B,-0.065778,0.048467,-0.124070,-0.093041,-0.090427,-0.550867,-0.152835,-0.038874,-0.064925,-0.487526,...,-2.451050,-0.349517,-2.229211,0.250027,1.041049,1.889957,0.713448,0.340771,-1.596324,-0.697522
ZYX,0.045843,-0.164044,0.114723,0.658516,0.456055,0.298952,-0.443505,-0.754256,-0.023101,0.849493,...,-1.160028,-0.684129,-2.960088,0.290458,0.872186,0.634645,0.163536,0.573991,-1.208096,-1.010533
ZZEF1,0.137463,-0.011429,0.198544,0.355522,0.306401,0.334437,-0.138179,0.158692,0.134387,0.329527,...,-1.849951,-0.607940,-1.769701,-0.017877,1.321725,1.615219,0.476449,-0.023698,-0.032857,0.421377


In [20]:
dff.to_pickle("biggerTrans.pkl")
dff.to_csv("biggerTrans.csv")

In [21]:
dffwos2 = pd.concat([dfhf,dfsf[nosars2Cols]], axis = 1)
dffwos2

Unnamed: 0,EB1_WT_0h__b,EB1_WT_00h__b,EB1_WT_8h__b,EB1_WT_18h__b,EB1_WT_24h__b,EB1_WT_48h__b,EB1_mucin_0h__b,EB1_mucin_00h__b,EB1_mucin_8h__b,EB1_mucin_18h__b,...,calu3_SARS__b_nsp16_moi5_36h,calu3_SARS__b_nsp16_moi5_48h,calu3_SARS__b_nsp16_moi5_60h,calu3_SARS__b_nsp16_moi5_72h,Series3_A549_RSV,Series4_A549_IAV,Series8_A549_RSV,Series8_A549_HPIV3,Series9_NHBE_IAV,Series9_NHBE_IAVdNS1
AAAS,-0.053539,-0.021629,0.069450,0.390366,0.073836,-0.442246,-0.082826,-0.422886,-0.045865,0.308248,...,0.161498,-0.190269,-0.092381,-0.055857,-1.989792,-1.184323,-0.351737,0.351903,-0.103042,0.155755
AACS,0.031504,0.131252,0.309998,0.401375,0.165346,0.079663,-0.291830,-0.386167,0.207874,0.335756,...,0.147569,-0.175630,-0.074628,0.162620,-2.202171,-0.946714,1.162057,1.322487,0.342570,0.522355
AADAC,-0.041660,0.031732,-0.106712,-0.224353,-0.045614,-0.890692,-0.040650,0.053375,-0.220946,-0.279067,...,-0.563230,-0.054222,0.463434,1.121392,-1.719463,-0.370890,1.957697,2.213505,0.000000,0.000000
AAK1,0.139425,0.148185,0.251240,0.290396,0.383917,0.680079,-0.185509,0.495312,-0.189335,0.234855,...,0.034660,0.276221,0.386410,-0.384465,-1.983893,-0.421551,1.864634,1.745253,0.652229,0.388312
AAMP,0.139837,0.005684,0.077773,0.128518,0.132485,0.122469,0.025393,0.055635,0.113951,0.080511,...,0.108959,-0.065833,-0.096275,0.085290,-1.860399,-1.139992,0.854072,1.078711,0.229610,0.488917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.045434,-0.015377,-0.169418,-0.392423,0.231872,-0.102981,-0.152677,-0.130969,-0.121257,-0.187774,...,-0.158400,-0.112629,-0.295572,0.069981,-1.651267,-0.765669,1.237991,1.567384,0.101000,0.519108
ZYG11B,-0.065778,0.048467,-0.124070,-0.093041,-0.090427,-0.550867,-0.152835,-0.038874,-0.064925,-0.487526,...,-0.172543,-0.286687,-0.276806,-0.064110,-2.451050,-0.349517,1.041049,1.889957,0.713448,0.340771
ZYX,0.045843,-0.164044,0.114723,0.658516,0.456055,0.298952,-0.443505,-0.754256,-0.023101,0.849493,...,0.168304,-0.130368,-0.144799,-0.077492,-1.160028,-0.684129,0.872186,0.634645,0.163536,0.573991
ZZEF1,0.137463,-0.011429,0.198544,0.355522,0.306401,0.334437,-0.138179,0.158692,0.134387,0.329527,...,0.079408,0.079414,0.241151,0.286260,-1.849951,-0.607940,1.321725,1.615219,0.476449,-0.023698


In [22]:
dffwos2.to_pickle("biggerTransNoSars2.pkl")
dffwos2.to_csv("biggerTransNoSars2.csv")

In [23]:
nosarsCols = [colName for colName in list(dfh.columns) if ("SARS" not in colName)]
#nosarsCols

In [24]:
dffwos = pd.concat([dfhf[nosarsCols],dfsf[nosars2Cols]], axis = 1)
dffwos

Unnamed: 0,EB1_WT_0h__b,EB1_WT_00h__b,EB1_WT_8h__b,EB1_WT_18h__b,EB1_WT_24h__b,EB1_WT_48h__b,EB1_mucin_0h__b,EB1_mucin_00h__b,EB1_mucin_8h__b,EB1_mucin_18h__b,...,Day7_MERS10e5_Vs_Mock_b,Day2_MERS10e6_Vs_Mock_b,Day4_MERS10e6_Vs_Mock_b,Day7_MERS10e6_Vs_Mock_b,Series3_A549_RSV,Series4_A549_IAV,Series8_A549_RSV,Series8_A549_HPIV3,Series9_NHBE_IAV,Series9_NHBE_IAVdNS1
AAAS,-0.053539,-0.021629,0.069450,0.390366,0.073836,-0.442246,-0.082826,-0.422886,-0.045865,0.308248,...,0.171651,-0.199660,0.792779,0.368657,-1.989792,-1.184323,-0.351737,0.351903,-0.103042,0.155755
AACS,0.031504,0.131252,0.309998,0.401375,0.165346,0.079663,-0.291830,-0.386167,0.207874,0.335756,...,-0.784688,-0.504837,-0.136926,-0.879605,-2.202171,-0.946714,1.162057,1.322487,0.342570,0.522355
AADAC,-0.041660,0.031732,-0.106712,-0.224353,-0.045614,-0.890692,-0.040650,0.053375,-0.220946,-0.279067,...,0.136705,-0.028470,-0.001031,-0.184787,-1.719463,-0.370890,1.957697,2.213505,0.000000,0.000000
AAK1,0.139425,0.148185,0.251240,0.290396,0.383917,0.680079,-0.185509,0.495312,-0.189335,0.234855,...,0.486412,0.121008,-0.047186,0.362905,-1.983893,-0.421551,1.864634,1.745253,0.652229,0.388312
AAMP,0.139837,0.005684,0.077773,0.128518,0.132485,0.122469,0.025393,0.055635,0.113951,0.080511,...,-0.925608,-0.659245,-0.107935,-0.704082,-1.860399,-1.139992,0.854072,1.078711,0.229610,0.488917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.045434,-0.015377,-0.169418,-0.392423,0.231872,-0.102981,-0.152677,-0.130969,-0.121257,-0.187774,...,-0.574723,-0.163952,-0.215609,0.268579,-1.651267,-0.765669,1.237991,1.567384,0.101000,0.519108
ZYG11B,-0.065778,0.048467,-0.124070,-0.093041,-0.090427,-0.550867,-0.152835,-0.038874,-0.064925,-0.487526,...,0.299537,0.160074,0.063669,-0.074202,-2.451050,-0.349517,1.041049,1.889957,0.713448,0.340771
ZYX,0.045843,-0.164044,0.114723,0.658516,0.456055,0.298952,-0.443505,-0.754256,-0.023101,0.849493,...,-0.208051,-0.561684,0.053471,0.290445,-1.160028,-0.684129,0.872186,0.634645,0.163536,0.573991
ZZEF1,0.137463,-0.011429,0.198544,0.355522,0.306401,0.334437,-0.138179,0.158692,0.134387,0.329527,...,0.042277,0.051029,-0.184655,-0.001946,-1.849951,-0.607940,1.321725,1.615219,0.476449,-0.023698


In [25]:
dffwos.to_pickle("biggerTransNoSars.pkl")
dffwos.to_csv("biggerTransNoSars.csv")

In [26]:
nocovCols = [colName for colName in list(dfh.columns) if ("SARS" not in colName and "MERS" not in colName)]
#nocovCols

In [27]:
dffwoc = pd.concat([dfhf[nocovCols],dfsf[nosars2Cols]], axis = 1)
dffwoc

Unnamed: 0,EB1_WT_0h__b,EB1_WT_00h__b,EB1_WT_8h__b,EB1_WT_18h__b,EB1_WT_24h__b,EB1_WT_48h__b,EB1_mucin_0h__b,EB1_mucin_00h__b,EB1_mucin_8h__b,EB1_mucin_18h__b,...,H1N1_WT_calu3_Cal04_12hr__b,H1N1_WT_calu3_Cal04_24hr__b,H1N1_WT_calu3_Cal04_36hr__b,H1N1_WT_calu3_Cal04_48hr__b,Series3_A549_RSV,Series4_A549_IAV,Series8_A549_RSV,Series8_A549_HPIV3,Series9_NHBE_IAV,Series9_NHBE_IAVdNS1
AAAS,-0.053539,-0.021629,0.069450,0.390366,0.073836,-0.442246,-0.082826,-0.422886,-0.045865,0.308248,...,-0.973503,-0.079759,-0.465493,-1.025579,-1.989792,-1.184323,-0.351737,0.351903,-0.103042,0.155755
AACS,0.031504,0.131252,0.309998,0.401375,0.165346,0.079663,-0.291830,-0.386167,0.207874,0.335756,...,-0.360177,-0.277152,-1.198113,-1.204228,-2.202171,-0.946714,1.162057,1.322487,0.342570,0.522355
AADAC,-0.041660,0.031732,-0.106712,-0.224353,-0.045614,-0.890692,-0.040650,0.053375,-0.220946,-0.279067,...,-0.040589,0.002395,0.025896,-0.100877,-1.719463,-0.370890,1.957697,2.213505,0.000000,0.000000
AAK1,0.139425,0.148185,0.251240,0.290396,0.383917,0.680079,-0.185509,0.495312,-0.189335,0.234855,...,-0.513472,-0.781464,-1.457754,-1.365912,-1.983893,-0.421551,1.864634,1.745253,0.652229,0.388312
AAMP,0.139837,0.005684,0.077773,0.128518,0.132485,0.122469,0.025393,0.055635,0.113951,0.080511,...,-0.252762,0.140468,-0.183620,0.077374,-1.860399,-1.139992,0.854072,1.078711,0.229610,0.488917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.045434,-0.015377,-0.169418,-0.392423,0.231872,-0.102981,-0.152677,-0.130969,-0.121257,-0.187774,...,-0.694767,0.460238,-0.261143,-0.286167,-1.651267,-0.765669,1.237991,1.567384,0.101000,0.519108
ZYG11B,-0.065778,0.048467,-0.124070,-0.093041,-0.090427,-0.550867,-0.152835,-0.038874,-0.064925,-0.487526,...,-0.916250,-0.569090,-0.918276,-0.913063,-2.451050,-0.349517,1.041049,1.889957,0.713448,0.340771
ZYX,0.045843,-0.164044,0.114723,0.658516,0.456055,0.298952,-0.443505,-0.754256,-0.023101,0.849493,...,-0.199760,0.362320,-0.130798,0.017730,-1.160028,-0.684129,0.872186,0.634645,0.163536,0.573991
ZZEF1,0.137463,-0.011429,0.198544,0.355522,0.306401,0.334437,-0.138179,0.158692,0.134387,0.329527,...,-0.746715,0.108914,-0.964507,-0.822642,-1.849951,-0.607940,1.321725,1.615219,0.476449,-0.023698


In [28]:
dffwoc.to_pickle("biggerTransNoCov.pkl")
dffwoc.to_csv("biggerTransNoCov.csv")

## Load tables

In [2]:
dff = pd.read_pickle("biggerTrans.pkl")

In [3]:
dffwos2 = pd.read_pickle("biggerTransNoSars2.pkl")

In [4]:
dffwos = pd.read_pickle("biggerTransNoSars.pkl")

In [5]:
dffwoc = pd.read_pickle("biggerTransNoCov.pkl")

## Build the hypergraph

In [6]:
dff.shape

(9524, 169)

In [7]:
# now we use some (though not all) of the from_dataframe() arguments and
# let the function take care of the dataframe manipulation
Hf = hnx.Hypergraph.from_dataframe(dff, # the whole dataframe, b and p columns
                                                    #columns=human_b_cols, # choose specific columns
                                                    zsc='columns', # other option is 'rows'
                                                    absolute=True, # absolute value after z-score is taken
                                                    lower_thresh=2) # applies the > 2 threshold after zscore and absolute value)

# options that I used the defaults for:
# transpose = False: this will transpose the dataframe after z-score and absolute value, essentially creating the dual hypergraph. Instead we're taking the dual after the fact (below).
# name = None (string): If you want to give the resulting hypergraph a "name" attribute. Not necessary.
# key = None (function which evaluates True or False): This is for more complcated thresholding. If you're just doing z-score > some threshold you don't need to worry about this.
# rows = None (list of row names): If you want to use only a subset of the rows. This is done before taking z-score so your z-score will be relative only to those rows chosen.
# upper_thresh = None (number): You can have a maximum value for the the zscore if you want. You can use both upper_thresh and lower_thresh.

In [8]:
# running example here for large s value because it finishes relatively quickly. 
# Small s values take a long time on these large hypergraphs!
#betcen30 = hnx.s_betweenness_centrality(HfD, s=30)
#clocen30 = hnx.s_harmonic_closeness_centrality(HfD, s=30)

In [9]:
Hf.shape

(8010, 159)

In [10]:
HfD = Hf.dual()
HfD.shape

(159, 8010)

In [11]:
# now we use some (though not all) of the from_dataframe() arguments and
# let the function take care of the dataframe manipulation
Hfwos2 = hnx.Hypergraph.from_dataframe(dffwos2, # the whole dataframe, b and p columns
                                                    #columns=human_b_cols, # choose specific columns
                                                    zsc='columns', # other option is 'rows'
                                                    absolute=True, # absolute value after z-score is taken
                                                    lower_thresh=2) # applies the > 2 threshold after zscore and absolute value)

# options that I used the defaults for:
# transpose = False: this will transpose the dataframe after z-score and absolute value, essentially creating the dual hypergraph. Instead we're taking the dual after the fact (below).
# name = None (string): If you want to give the resulting hypergraph a "name" attribute. Not necessary.
# key = None (function which evaluates True or False): This is for more complcated thresholding. If you're just doing z-score > some threshold you don't need to worry about this.
# rows = None (list of row names): If you want to use only a subset of the rows. This is done before taking z-score so your z-score will be relative only to those rows chosen.
# upper_thresh = None (number): You can have a maximum value for the the zscore if you want. You can use both upper_thresh and lower_thresh.

In [12]:
Hfwos2.shape

(7960, 154)

In [13]:
Hfwos2D = Hfwos2.dual()

In [14]:
Hfwos2D.shape

(154, 7960)

In [15]:
graph = HfD

In [16]:
import ray

In [17]:
ray.init(num_cpus=5)

2020-05-23 13:49:15,794	INFO resource_spec.py:204 -- Starting Ray with 9.81 GiB memory available for workers and up to 4.92 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-05-23 13:49:16,160	INFO services.py:1168 -- View the Ray dashboard at [1m[32mlocalhost:8266[39m[22m


{'node_ip_address': '130.20.194.73',
 'raylet_ip_address': '130.20.194.73',
 'redis_address': '130.20.194.73:29508',
 'object_store_address': '/tmp/ray/session_2020-05-23_13-49-15_782235_51499/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-05-23_13-49-15_782235_51499/sockets/raylet',
 'webui_url': 'localhost:8266',
 'session_dir': '/tmp/ray/session_2020-05-23_13-49-15_782235_51499'}

In [18]:
@ray.remote
def betweenness(s, graph = graph):
    sbt = hnx.s_betweenness_centrality(graph, s=s)
    sbt = pd.Series(sbt)
    sbt.to_pickle("intermediate/" + str(s) + "-betweenness.pkl")
    return(sbt)

@ray.remote
def closeness(s, graph = graph):
    scl = hnx.s_harmonic_closeness_centrality(graph, s=s)
    scl = pd.Series(scl)
    scl.to_pickle("intermediate/" + str(s) + "-closeness.pkl")
    return(scl)    

In [46]:
#import multiprocessing
#cpuNum = multiprocessing.cpu_count()
#pool = multiprocessing.Pool(cpuNum - 2)

In [47]:
graph = HfD

In [None]:
HfDsbt = ray.get([betweenness.remote(i) for i in range(1,51)])

In [None]:
HfDscl = ray.get([closeness.remote(i) for i in range(1,51)])

[2m[33m(pid=raylet)[0m F0523 16:18:50.839329 262149568 service_based_gcs_client.cc:92]  Check failed: num_attempts < RayConfig::instance().gcs_service_connect_retries() No entry found for GcsServerAddress
[2m[33m(pid=raylet)[0m *** Check failure stack trace: ***
[2m[33m(pid=raylet)[0m     @        0x10903bae2  google::LogMessage::~LogMessage()
[2m[33m(pid=raylet)[0m     @        0x108b9a555  ray::RayLog::~RayLog()
[2m[33m(pid=raylet)[0m     @        0x10893c9a6  ray::gcs::ServiceBasedGcsClient::GetGcsServerAddressFromRedis()
[2m[33m(pid=raylet)[0m     @        0x108940c2e  std::__1::__function::__func<>::operator()()
[2m[33m(pid=raylet)[0m     @        0x108910319  _ZZN3ray3rpc12GcsRpcClient15ReportHeartbeatERKNS0_22ReportHeartbeatRequestERKNSt3__18functionIFvRKNS_6StatusERKNS0_20ReportHeartbeatReplyEEEEENKUlS9_SC_E_clES9_SC_
[2m[33m(pid=raylet)[0m     @        0x10890fc9b  ray::rpc::ClientCallImpl<>::OnReplyReceived()
[2m[33m(pid=raylet)[0m     @        0x10

In [3]:
HfDsbt = []
HfDscl = []
for s in range(1,51):
    HfDsbt.append(pd.read_pickle("intermediate/" + str(s) + "-betweenness.pkl"))
    HfDscl.append(pd.read_pickle("intermediate/" + str(s) + "-closeness.pkl"))

In [4]:
sbetweenness = pd.concat(HfDsbt, axis = 1).replace([np.inf, -np.inf, np.nan], 0)

In [5]:
scloseness = pd.concat(HfDscl, axis = 1).replace([np.inf, -np.inf, np.nan], 0)

In [6]:
sbetweenness.columns = list(range(1,51))
scloseness.columns = list(range(1,51))

In [7]:
sbetweenness.to_pickle("biggerTrans-s-betweenness.pkl")
sbetweenness.to_csv("biggerTrans-s-betweenness.csv")

In [8]:
scloseness.to_pickle("biggerTrans-s-closeness.pkl")
scloseness.to_csv("biggerTrans-s-closeness.csv")

In [None]:
graph = Hfwos2D

In [None]:
Hfwos2Dsbt = ray.get([betweenness.remote(i) for i in range(1,51)])
Hfwos2Dscl = ray.get([closeness.remote(i) for i in range(1,51)])

In [9]:
Hfwos2Dsbt = []
Hfwos2Dscl = []
for s in range(1,51):
    Hfwos2Dsbt.append(pd.read_pickle("intermediateNoSars2/" + str(s) + "-betweenness.pkl"))
    Hfwos2Dscl.append(pd.read_pickle("intermediateNoSars2/" + str(s) + "-closeness.pkl"))

In [10]:
sbetweennessWoS2 = pd.concat(Hfwos2Dsbt, axis = 1).replace([np.inf, -np.inf, np.nan], 0)
sclosenessWoS2 = pd.concat(Hfwos2Dscl, axis = 1).replace([np.inf, -np.inf, np.nan], 0)

In [11]:
sbetweennessWoS2.columns = list(range(1,51))
sclosenessWoS2.columns = list(range(1,51))

In [12]:
sbetweennessWoS2.to_pickle("biggerTransNoSars2-s-betweenness.pkl")
sbetweennessWoS2.to_csv("biggerTransNoSars2-s-betweenness.csv")

In [13]:
sclosenessWoS2.to_pickle("biggerTransNoSars2-s-closeness.pkl")
sclosenessWoS2.to_csv("biggerTransNoSars2-s-closeness.csv")

In [43]:
ray.shutdown()

### Analyze the results

In [31]:
sbetweenness = pd.read_pickle("biggerTrans-s-betweenness.pkl")
sbetweenness

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
CAPZB,0.000150,0.000142,8.819112e-05,5.714265e-05,5.441166e-05,2.430007e-05,1.519429e-05,0.000006,0.000004,0.000002,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SVOPL,0.000060,0.000025,4.304878e-06,4.656175e-07,2.858239e-09,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CDK5RAP2,0.000012,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ANK1,0.000109,0.000064,3.348922e-05,2.095810e-05,1.739117e-05,9.638102e-07,1.257719e-07,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CCDC81,0.000065,0.000046,9.694567e-06,1.150885e-05,3.652107e-06,2.715540e-07,0.000000e+00,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AVIL,0.000518,0.000661,6.743457e-04,6.649639e-04,3.558500e-04,4.707320e-04,1.376842e-04,0.000120,0.000205,0.000109,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NCAPG,0.000034,0.000003,1.264298e-07,7.115161e-09,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
MYBPH,0.000006,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CALML3,0.000011,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
scloseness = pd.read_pickle("biggerTrans-s-closeness.pkl")
scloseness

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
ATP6V1G2,0.642777,0.564432,0.522610,0.493235,0.458562,0.407446,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
RASD1,0.840867,0.772239,0.728177,0.695014,0.658218,0.622772,0.590095,0.571206,0.531172,0.511459,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DAB2IP,0.623236,0.541141,0.500140,0.439537,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TAP2,0.709015,0.619721,0.577407,0.544936,0.518982,0.495338,0.470078,0.454604,0.429207,0.411683,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
GPSM1,0.684230,0.578073,0.539293,0.512047,0.485992,0.459653,0.423499,0.412803,0.384241,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GPATCH8,0.527344,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PRPF39,0.527344,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CTDSPL2,0.527344,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
EHF,0.527344,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
sbetweennessWoS2 = pd.read_pickle("biggerTransNoSars2-s-betweenness.pkl")
sbetweennessWoS2

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
PTCH1,0.000033,9.180005e-06,2.399982e-06,2.093663e-07,0.000000,0.000000e+00,0.000000e+00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
KRT85,0.000240,1.444418e-04,6.386706e-05,3.172846e-05,0.000027,7.547034e-05,1.188830e-07,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
IWS1,0.000029,7.719029e-06,1.658944e-06,2.394198e-07,0.000000,0.000000e+00,0.000000e+00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CDC34,0.000030,1.572892e-05,1.081604e-05,6.620388e-06,0.000004,4.917013e-07,8.842288e-09,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
RFX1,0.000003,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
POP7,0.000010,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BCKDHB,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TBC1D4,0.000033,1.672712e-07,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PRR3,0.000039,2.024390e-06,2.905010e-08,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
sclosenessWoS2 = pd.read_pickle("biggerTransNoSars2-s-closeness.pkl")
sclosenessWoS2

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
MADD,0.591155,0.525492,0.496608,0.441096,0.348813,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LRRC25,0.767998,0.691226,0.645117,0.606519,0.569807,0.532258,0.498121,0.470850,0.437684,0.416771,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
MYBPC3,0.758512,0.650743,0.592811,0.550494,0.518717,0.488575,0.461970,0.440162,0.369232,0.333996,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
RTN3,0.710516,0.642794,0.598455,0.569082,0.544095,0.516003,0.489451,0.473155,0.442317,0.425635,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
GPR176,0.625455,0.557756,0.508466,0.473117,0.420462,0.362126,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VAMP1,0.522679,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
RGMB,0.522679,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AKAP7,0.522679,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CASP7,0.522679,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Calculate the differences between the metrics and average metrics

##### New genes poped up in hypergraph after adding SARS2 data

In [35]:
hnXOR = list((set(sbetweenness.index) ^ set(sbetweennessWoS2.index)) & set(sbetweenness.index))
hnXOR

['VEZF1',
 'MPND',
 'MALL',
 'ZXDA',
 'TMEM60',
 'SRFBP1',
 'ACADL',
 'NDUFB8',
 'CTDSPL2',
 'EHF',
 'KCNK6',
 'NSDHL',
 'MBTPS2',
 'DMRTA1',
 'CYP2U1',
 'MARCKS',
 'PRX',
 'AP2M1',
 'ARMCX5',
 'PSTPIP2',
 'VIPR1',
 'SLC22A1',
 'PLEKHF2',
 'CCDC126',
 'RBPJ',
 'SCIN',
 'ANKRD49',
 'IFT122',
 'SLC25A11',
 'ZBTB5',
 'NPEPL1',
 'XK',
 'ALDH4A1',
 'RSF1',
 'PRR14',
 'GCDH',
 'PRELID2',
 'STK33',
 'IL27RA',
 'TBC1D15',
 'ZP3',
 'PSMB3',
 'PRPF39',
 'GPATCH8',
 'SCEL',
 'BNIP1',
 'KCNAB2',
 'SENP8',
 'THAP1',
 'CRTC2']

In [36]:
len(hnXOR)

50

Function from Emilie for calculating differences of centrality scores and ranks

In [None]:
# Given a dictionary, D, where the values are numerical returns a dictionary
# which gives the ranks for each of they keys in D. All values (w/o duplicates)
# are sorted and then assigned a rank. Ties are not assigned the average rank
# rather the ranks are between 1 and the number of unique values in D
def rank_w_duplicates(D): # D is a dictionary
    Ds = sorted(D.items(), key=lambda x: x[1], reverse=True)
    
    values = sorted(list(set(D.values())))
    ranks = {values[i] : len(values)-i for i in range(len(values))}
    
    Dr = {Ds[i][0] : ranks[Ds[i][1]] for i in range(len(Ds))}
    
    return Dr


def compare_centralities(c1, c2): # c1 and c2 are dictionaries
    # find common keys, keys only in c1, keys only in c2
    c1_keys = set(c1.keys())
    c2_keys = set(c2.keys())
    common_keys = c1_keys.intersection(c2_keys)
    c1_only = c1_keys.difference(c2_keys)
    c2_only = c2_keys.difference(c1_keys)
    
    # find difference between c1 and c2 values
    value_diff = {}
    for key in common_keys:
        value_diff[key] = c1[key] - c2[key]
    for key in c1_only:
        value_diff[key] = c1[key] - 0
    for key in c2_only:
        value_diff[key] = 0 - c2[key]
    
    # rank keys in both
    c1r = rank_w_duplicates(c1)
    c2r = rank_w_duplicates(c2)
        
    # find difference between c1 and c2 ranks
    rank_diff = {}
    for key in common_keys:
        rank_diff[key] = c1r[key] - c2r[key]
    for key in c1_only:
        rank_diff[key] = c1r[key] - len(c2r)
    for key in c2_only:
        rank_diff[key] = len(c1r) - c2r[key]
    
    return common_keys, c1_only, c2_only, value_diff, rank_diff, c1r, c2r

In [None]:
def diffDf(df0, df1):
    if df0.shape[1] != df1.shape[1]:
        print("The numbers of columns are not the same!")
    

In [37]:
sbetweenness.drop(hnXOR, inplace = True)
scloseness.drop(hnXOR, inplace = True)

In [39]:
sbetweenness.sort_index(inplace = True)
sbetweenness

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
AAAS,0.000073,3.627737e-05,1.459537e-05,0.000005,9.937361e-07,2.945637e-07,8.312605e-09,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AACS,0.000006,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AADAC,0.000074,6.401750e-05,4.621253e-05,0.000019,7.211050e-06,9.760466e-07,5.527779e-07,3.600636e-06,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAK1,0.000768,1.287211e-03,1.574562e-03,0.001122,1.895112e-03,9.148431e-04,6.610389e-04,5.161321e-04,0.000552,0.000374,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAMP,0.000013,6.019566e-07,3.167409e-08,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.000075,1.680591e-05,9.129638e-06,0.000004,5.307221e-07,1.136252e-08,0.000000e+00,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYG11B,0.000146,1.249192e-04,3.419859e-05,0.000006,7.331496e-07,5.380842e-08,2.841477e-08,2.234328e-09,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYX,0.000266,3.715836e-04,3.078923e-04,0.000282,2.006616e-04,6.871099e-04,1.199407e-04,1.041794e-04,0.000136,0.000171,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZZEF1,0.000006,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [41]:
sbetweennessWoS2.sort_index(inplace = True)
sbetweennessWoS2

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
AAAS,0.000075,3.774612e-05,1.521277e-05,0.000005,1.040808e-06,3.368786e-07,9.807120e-09,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AACS,0.000006,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AADAC,0.000079,6.952110e-05,5.164935e-05,0.000020,9.327360e-06,9.811640e-07,5.701684e-07,3.611816e-06,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAK1,0.000802,1.355282e-03,1.672834e-03,0.001188,1.963523e-03,9.683445e-04,6.998909e-04,5.589585e-04,0.000658,0.000414,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAMP,0.000013,6.358916e-07,3.281285e-08,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.000078,1.744880e-05,9.439591e-06,0.000004,5.726370e-07,1.174281e-08,0.000000e+00,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYG11B,0.000155,1.352338e-04,3.678863e-05,0.000006,7.637717e-07,5.712850e-08,2.832011e-08,2.372563e-09,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYX,0.000276,3.872810e-04,3.211504e-04,0.000298,2.112470e-04,7.052269e-04,1.261056e-04,1.112449e-04,0.000140,0.000176,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZZEF1,0.000007,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
scloseness.sort_index(inplace = True)
scloseness

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
AAAS,0.636284,0.570212,0.538423,0.513014,0.485700,0.458705,0.423758,0.342831,0.319270,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AACS,0.546635,0.479587,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AADAC,0.647771,0.590118,0.558448,0.521904,0.489796,0.464398,0.441547,0.417981,0.389645,0.286147,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAK1,0.817518,0.722682,0.665066,0.620474,0.584339,0.551815,0.519037,0.499732,0.465723,0.439992,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAMP,0.578599,0.525782,0.499523,0.462988,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.634411,0.553596,0.526682,0.499968,0.458470,0.420895,0.334276,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYG11B,0.652766,0.581628,0.542523,0.520101,0.502889,0.481353,0.459146,0.419242,0.364147,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYX,0.711574,0.637353,0.592602,0.558884,0.526882,0.495751,0.464565,0.443014,0.414815,0.395037,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZZEF1,0.558622,0.499129,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
sclosenessWoS2.sort_index(inplace = True)
sclosenessWoS2

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
AAAS,0.637140,0.571225,0.539080,0.513744,0.486604,0.459257,0.424277,0.343215,0.320416,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AACS,0.546928,0.480028,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AADAC,0.648700,0.591418,0.559375,0.522907,0.490344,0.464382,0.442943,0.417188,0.389758,0.286683,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAK1,0.819513,0.725894,0.667636,0.622914,0.586775,0.553763,0.521869,0.500933,0.467170,0.441723,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAMP,0.579093,0.526154,0.500114,0.463119,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.635256,0.554345,0.527079,0.500524,0.459273,0.420930,0.334056,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYG11B,0.653725,0.582806,0.543099,0.520747,0.503498,0.481603,0.460116,0.417435,0.363555,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYX,0.712904,0.639335,0.594008,0.560344,0.528838,0.496808,0.466185,0.443566,0.415347,0.395345,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZZEF1,0.558990,0.499607,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
sbetweennessDiff = sbetweenness - sbetweennessWoS2
sbetweennessDiff

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
AAAS,-2.299570e-06,-1.468750e-06,-6.174015e-07,-2.278689e-07,-4.707191e-08,-4.231496e-08,-1.494515e-09,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AACS,-1.600799e-07,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AADAC,-5.287983e-06,-5.503599e-06,-5.436820e-06,-1.314683e-06,-2.116310e-06,-5.117453e-09,-1.739051e-08,-1.118022e-08,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAK1,-3.404899e-05,-6.807088e-05,-9.827259e-05,-6.579373e-05,-6.841039e-05,-5.350136e-05,-3.885198e-05,-4.282638e-05,-0.000106,-0.000040,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAMP,-4.695279e-07,-3.393504e-08,-1.138759e-09,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,-3.200890e-06,-6.428884e-07,-3.099535e-07,-1.535200e-07,-4.191495e-08,-3.802903e-10,0.000000e+00,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYG11B,-9.618596e-06,-1.031452e-05,-2.590031e-06,-2.870531e-07,-3.062213e-08,-3.320083e-09,9.465101e-11,-1.382343e-10,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYX,-9.877241e-06,-1.569734e-05,-1.325808e-05,-1.560818e-05,-1.058540e-05,-1.811705e-05,-6.164825e-06,-7.065501e-06,-0.000004,-0.000005,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZZEF1,-3.105290e-07,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
sclosenessDiff = scloseness - sclosenessWoS2
sclosenessDiff

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
AAAS,-0.000856,-0.001013,-0.000657,-0.000731,-0.000904,-0.000553,-0.000519,-0.000384,-0.001146,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AACS,-0.000293,-0.000442,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AADAC,-0.000928,-0.001300,-0.000927,-0.001003,-0.000548,0.000016,-0.001397,0.000793,-0.000113,-0.000536,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAK1,-0.001995,-0.003213,-0.002570,-0.002440,-0.002436,-0.001948,-0.002832,-0.001202,-0.001447,-0.001731,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAMP,-0.000494,-0.000372,-0.000591,-0.000131,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,-0.000844,-0.000749,-0.000397,-0.000556,-0.000802,-0.000035,0.000220,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYG11B,-0.000960,-0.001178,-0.000576,-0.000647,-0.000608,-0.000250,-0.000969,0.001807,0.000591,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYX,-0.001329,-0.001982,-0.001406,-0.001460,-0.001957,-0.001057,-0.001620,-0.000553,-0.000532,-0.000308,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZZEF1,-0.000368,-0.000478,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
sbetweenessPos = sbetweennessDiff[sbetweennessDiff.gt(0).any(axis = 1)]
sbetweenessPos

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
AATK,0.000040,3.981311e-05,1.235006e-05,5.423577e-06,7.498332e-09,2.849007e-06,1.846478e-06,2.543654e-07,1.116660e-06,3.221549e-07,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ABCA3,-0.000019,-3.240442e-05,-3.190686e-05,-2.442907e-05,-1.889318e-05,-1.227366e-05,-1.122346e-05,-6.931810e-06,-4.474331e-06,-1.202682e-06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ABCD3,-0.000006,-2.884488e-06,-1.384723e-06,-8.583385e-07,-4.587985e-07,-6.071471e-08,-1.486709e-09,1.200161e-07,0.000000e+00,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ABCG1,0.000013,5.103882e-06,4.698928e-06,1.201349e-05,1.759592e-05,1.790951e-05,1.484693e-05,1.053141e-05,5.768009e-07,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ABHD14B,-0.000003,-3.958236e-07,-2.131663e-07,-8.617524e-08,-1.242527e-08,1.764192e-10,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZNRD1,0.000012,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZSWIM5,0.000039,4.513311e-06,1.442536e-05,2.270090e-05,3.472927e-06,8.961212e-06,1.428725e-06,2.010765e-07,1.503928e-06,9.068779e-07,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZWINT,0.000023,6.688716e-06,2.687971e-06,3.254957e-06,1.510312e-07,2.234935e-07,6.156543e-08,0.000000e+00,0.000000e+00,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYG11B,-0.000010,-1.031452e-05,-2.590031e-06,-2.870531e-07,-3.062213e-08,-3.320083e-09,9.465101e-11,-1.382343e-10,0.000000e+00,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [53]:
sclosenessPos = sclosenessDiff[sclosenessDiff.gt(0).any(axis = 1)]
sclosenessPos

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
AADAC,-0.000928,-0.001300,-0.000927,-0.001003,-0.000548,0.000016,-0.001397,0.000793,-0.000113,-0.000536,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AAK1,-0.001995,-0.003213,-0.002570,-0.002440,-0.002436,-0.001948,-0.002832,-0.001202,-0.001447,-0.001731,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AATK,0.010069,0.004613,0.004243,0.002427,0.003502,0.004749,0.000572,0.001502,0.004549,0.004579,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ABCA1,-0.001475,-0.001759,-0.001206,-0.001014,-0.000910,-0.000289,-0.000859,0.000762,0.000948,0.000373,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ABCA3,-0.001547,-0.002395,-0.001842,-0.001702,-0.001823,-0.001031,-0.002144,-0.000245,-0.000885,-0.000738,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZSWIM6,-0.000304,0.000013,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZWINT,0.015169,0.003070,0.002617,0.005824,0.002744,0.004461,0.023678,0.396517,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZXDC,-0.000844,-0.000749,-0.000397,-0.000556,-0.000802,-0.000035,0.000220,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZYG11B,-0.000960,-0.001178,-0.000576,-0.000647,-0.000608,-0.000250,-0.000969,0.001807,0.000591,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
