# Analyzing SARS-Cov-2 data

In [216]:
import hypernetx as hnx
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import sys

In [217]:
filedir = "/Users/feng626/covid19/SARSCov2/data/originalData"
bigtransFile = f'{filedir}/bigTrans/bigTrans.txt'
sars2File = f'{filedir}/GSE147507_RawReadCounts_Human.tsv'

In [218]:
dft = pd.read_csv(bigtransFile, sep = '\t')
dft.head()

Unnamed: 0,EB1_WT_0h__b,EB1_WT_0h__p,EB1_WT_00h__b,EB1_WT_00h__p,EB1_WT_8h__b,EB1_WT_8h__p,EB1_WT_18h__b,EB1_WT_18h__p,EB1_WT_24h__b,EB1_WT_24h__p,...,mouse_ln_WNV_WT_6d__b,mouse_ln_WNV_WT_6d__p,mouse_ln_WNV_E218A_1d__b,mouse_ln_WNV_E218A_1d__p,mouse_ln_WNV_E218A_2d__b,mouse_ln_WNV_E218A_2d__p,mouse_ln_WNV_E218A_4d__b,mouse_ln_WNV_E218A_4d__p,mouse_ln_WNV_E218A_6d__b,mouse_ln_WNV_E218A_6d__p
AAAS,-0.053539,0.897905,-0.021629,0.971758,0.06945,0.893481,0.390366,0.150667,0.073836,0.705193,...,0.818601,0.199452,0.675964,0.000681,1.295524,0.000307,1.474642,0.000588,1.359757,0.012836
AACS,0.031504,0.896717,0.131252,0.409202,0.309998,0.244921,0.401375,0.037433,0.165346,0.35411,...,-1.549177,0.038906,-0.788553,0.123942,-0.743268,0.03809,-0.632166,0.112165,-1.220868,0.041723
AADAC,-0.04166,0.837731,0.031732,0.90552,-0.106712,0.351493,-0.224353,0.169825,-0.045614,0.834415,...,0.036319,0.768323,0.030294,0.781156,0.03026,0.748385,0.020523,0.891356,0.009379,0.963882
AAK1,0.139425,0.507731,0.148185,0.805341,0.25124,0.196918,0.290396,0.058642,0.383917,0.08219,...,0.235774,0.359214,0.871901,0.009966,1.341327,0.03406,-0.485618,0.168506,-1.095028,0.10079
AAMP,0.139837,0.470499,0.005684,0.988458,0.077773,0.496487,0.128518,0.292702,0.132485,0.179721,...,-1.181211,0.237053,0.389658,0.346917,-0.150656,0.560934,-0.244075,0.553876,-0.914955,0.221456


In [219]:
dftcolparse = dict()
for col in dft.columns:
    dftcolparse[col] = set(col.split('_'))

dfthuman = [c for c in dft.columns if (('mouse' not in dftcolparse[c]) and ('b' in dftcolparse[c]) and ("Day" not in dftcolparse[c]))]
dfthuman

['EB1_WT_0h__b',
 'EB1_WT_00h__b',
 'EB1_WT_8h__b',
 'EB1_WT_18h__b',
 'EB1_WT_24h__b',
 'EB1_WT_48h__b',
 'EB1_mucin_0h__b',
 'EB1_mucin_00h__b',
 'EB1_mucin_8h__b',
 'EB1_mucin_18h__b',
 'EB1_mucin_24h__b',
 'EB1_mucin_48h__b',
 'EB1_ssGP_0h__b',
 'EB1_ssGP_00h__b',
 'EB1_ssGP_8h__b',
 'EB1_ssGP_18h__b',
 'EB1_ssGP_24h__b',
 'EB1_ssGP_48h__b',
 'EB2_WT_deltaVP30_0h__b',
 'EB2_WT_deltaVP30_8h__b',
 'EB2_WT_deltaVP30_24h__b',
 'EB2_WT_deltaVP30_48h__b',
 'EB2_WT_deltaVP30_72h__b',
 'H7N9_calu3_WT_0hr__b',
 'H7N9_calu3_WT_7hr__b',
 'H7N9_calu3_WT_12hr__b',
 'H7N9_calu3_WT_24hr__b',
 'H7N9_calu3_FM_0hr__b',
 'H7N9_calu3_FM_7hr__b',
 'H7N9_calu3_FM_12hr__b',
 'H7N9_calu3_FM_24hr__b',
 'H7N9_calu3_X691_0hr__b',
 'H7N9_calu3_X691_7hr__b',
 'H7N9_calu3_X691_12hr__b',
 'H7N9_calu3_X691_24hr__b',
 'H5N1_calu3_WT_0hr__b',
 'H5N1_calu3_WT_7hr__b',
 'H5N1_calu3_WT_12hr__b',
 'H5N1_calu3_WT_24hr__b',
 'H5N1_calu3_X627E_0hr__b',
 'H5N1_calu3_X627E_7hr__b',
 'H5N1_calu3_X627E_12hr__b',
 'H5N1_calu3_

In [220]:
dfh = dft[dfthuman]
dfh

Unnamed: 0,EB1_WT_0h__b,EB1_WT_00h__b,EB1_WT_8h__b,EB1_WT_18h__b,EB1_WT_24h__b,EB1_WT_48h__b,EB1_mucin_0h__b,EB1_mucin_00h__b,EB1_mucin_8h__b,EB1_mucin_18h__b,...,calu3_SARS__b_ExoNI_moi1_60h,calu3_SARS__b_ExoNI_moi1_72h,calu3_SARS__b_nsp16_moi5_0h,calu3_SARS__b_nsp16_moi5_7h,calu3_SARS__b_nsp16_moi5_12h,calu3_SARS__b_nsp16_moi5_24h,calu3_SARS__b_nsp16_moi5_36h,calu3_SARS__b_nsp16_moi5_48h,calu3_SARS__b_nsp16_moi5_60h,calu3_SARS__b_nsp16_moi5_72h
AAAS,-0.053539,-0.021629,0.069450,0.390366,0.073836,-0.442246,-0.082826,-0.422886,-0.045865,0.308248,...,0.032925,0.137517,0.035045,0.052741,0.007945,-0.035023,0.161498,-0.190269,-0.092381,-0.055857
AACS,0.031504,0.131252,0.309998,0.401375,0.165346,0.079663,-0.291830,-0.386167,0.207874,0.335756,...,-0.153420,0.137480,-0.013850,-0.000476,-0.081773,0.137041,0.147569,-0.175630,-0.074628,0.162620
AADAC,-0.041660,0.031732,-0.106712,-0.224353,-0.045614,-0.890692,-0.040650,0.053375,-0.220946,-0.279067,...,0.233175,0.218920,0.191387,0.394776,-0.225634,-0.009850,-0.563230,-0.054222,0.463434,1.121392
AAK1,0.139425,0.148185,0.251240,0.290396,0.383917,0.680079,-0.185509,0.495312,-0.189335,0.234855,...,0.265520,-0.325278,0.199622,-0.229799,0.292203,-0.436667,0.034660,0.276221,0.386410,-0.384465
AAMP,0.139837,0.005684,0.077773,0.128518,0.132485,0.122469,0.025393,0.055635,0.113951,0.080511,...,-0.104224,0.143239,-0.074407,-0.060982,-0.054198,0.162818,0.108959,-0.065833,-0.096275,0.085290
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.045434,-0.015377,-0.169418,-0.392423,0.231872,-0.102981,-0.152677,-0.130969,-0.121257,-0.187774,...,-0.232420,0.153464,-0.098613,-0.092549,-0.240960,0.141063,-0.158400,-0.112629,-0.295572,0.069981
ZYG11B,-0.065778,0.048467,-0.124070,-0.093041,-0.090427,-0.550867,-0.152835,-0.038874,-0.064925,-0.487526,...,0.258713,0.128794,-0.231901,0.141698,0.109851,-0.102701,-0.172543,-0.286687,-0.276806,-0.064110
ZYX,0.045843,-0.164044,0.114723,0.658516,0.456055,0.298952,-0.443505,-0.754256,-0.023101,0.849493,...,-0.261560,0.162087,-0.098509,0.138738,-0.280162,0.035223,0.168304,-0.130368,-0.144799,-0.077492
ZZEF1,0.137463,-0.011429,0.198544,0.355522,0.306401,0.334437,-0.138179,0.158692,0.134387,0.329527,...,0.203721,0.200494,-0.031792,-0.029531,0.195086,-0.129848,0.079408,0.079414,0.241151,0.286260


In [221]:
dfs = pd.read_csv(sars2File, sep = '\t', index_col=0)
dfs.head()

Unnamed: 0,Series1_NHBE_Mock_1,Series1_NHBE_Mock_2,Series1_NHBE_Mock_3,Series1_NHBE_SARS-CoV-2_1,Series1_NHBE_SARS-CoV-2_2,Series1_NHBE_SARS-CoV-2_3,Series2_A549_Mock_1,Series2_A549_Mock_2,Series2_A549_Mock_3,Series2_A549_SARS-CoV-2_1,...,Series15_COVID19Lung_1,Series16_A549-ACE2_Mock_1,Series16_A549-ACE2_Mock_2,Series16_A549-ACE2_Mock_3,Series16_A549-ACE2_SARS-CoV-2_1,Series16_A549-ACE2_SARS-CoV-2_2,Series16_A549-ACE2_SARS-CoV-2_3,Series16_A549-ACE2_SARS-CoV-2_Rux_1,Series16_A549-ACE2_SARS-CoV-2_Rux_2,Series16_A549-ACE2_SARS-CoV-2_Rux_3
DDX11L1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
WASH7P,29,24,23,34,19,44,68,43,33,65,...,0,0,11,7,2,6,5,12,6,8
FAM138A,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
FAM138F,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
OR4F5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [222]:
dfscolparse = dict()
for col in dfs.columns:
    dfscolparse[col] = set(col.split('_'))
    
dfscolparse

{'Series1_NHBE_Mock_1': {'1', 'Mock', 'NHBE', 'Series1'},
 'Series1_NHBE_Mock_2': {'2', 'Mock', 'NHBE', 'Series1'},
 'Series1_NHBE_Mock_3': {'3', 'Mock', 'NHBE', 'Series1'},
 'Series1_NHBE_SARS-CoV-2_1': {'1', 'NHBE', 'SARS-CoV-2', 'Series1'},
 'Series1_NHBE_SARS-CoV-2_2': {'2', 'NHBE', 'SARS-CoV-2', 'Series1'},
 'Series1_NHBE_SARS-CoV-2_3': {'3', 'NHBE', 'SARS-CoV-2', 'Series1'},
 'Series2_A549_Mock_1': {'1', 'A549', 'Mock', 'Series2'},
 'Series2_A549_Mock_2': {'2', 'A549', 'Mock', 'Series2'},
 'Series2_A549_Mock_3': {'3', 'A549', 'Mock', 'Series2'},
 'Series2_A549_SARS-CoV-2_1': {'1', 'A549', 'SARS-CoV-2', 'Series2'},
 'Series2_A549_SARS-CoV-2_2': {'2', 'A549', 'SARS-CoV-2', 'Series2'},
 'Series2_A549_SARS-CoV-2_3': {'3', 'A549', 'SARS-CoV-2', 'Series2'},
 'Series3_A549_Mock_1': {'1', 'A549', 'Mock', 'Series3'},
 'Series3_A549_Mock_2': {'2', 'A549', 'Mock', 'Series3'},
 'Series3_A549_RSV_1': {'1', 'A549', 'RSV', 'Series3'},
 'Series3_A549_RSV_2': {'2', 'A549', 'RSV', 'Series3'},
 'Se

In [223]:
clusters = [[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]],[[13,14],[15,16]],[[17,18],[19,20]],[[21,22,23],[24,25,26]],[[27,28,29],[30,31,32]],[[33,34,35],[36,37,38]],[[39,40,41],[42,43,44]],[[39,40,41],[45,46,47]],[[48,49,50,51],[52,53,54,55]],[[48,49,50,51],[56,57,58,59]],[[66,67],[68,69]],[[70,71,72],[73,74,75]],[[70,71,72],[76,77,78]]]
clusters

[[[1, 2, 3], [4, 5, 6]],
 [[7, 8, 9], [10, 11, 12]],
 [[13, 14], [15, 16]],
 [[17, 18], [19, 20]],
 [[21, 22, 23], [24, 25, 26]],
 [[27, 28, 29], [30, 31, 32]],
 [[33, 34, 35], [36, 37, 38]],
 [[39, 40, 41], [42, 43, 44]],
 [[39, 40, 41], [45, 46, 47]],
 [[48, 49, 50, 51], [52, 53, 54, 55]],
 [[48, 49, 50, 51], [56, 57, 58, 59]],
 [[66, 67], [68, 69]],
 [[70, 71, 72], [73, 74, 75]],
 [[70, 71, 72], [76, 77, 78]]]

In [224]:
experiments = ["Series1_NHBE_SARS-CoV-2","Series2_A549_SARS-CoV-2","Series3_A549_RSV","Series4_A549_IAV","Series5_A549_SARS-CoV-2","Series6_A549-ACE2_SARS-CoV-2","Series7_Calu3_SARS-CoV-2","Series8_A549_RSV","Series8_A549_HPIV3","Series9_NHBE_IAV","Series9_NHBE_IAVdNS1","Series15_COVID19Lung","Series16_A549-ACE2_SARS-CoV-2","Series16_A549-ACE2_SARS-CoV-2_Rux"]
experiments

['Series1_NHBE_SARS-CoV-2',
 'Series2_A549_SARS-CoV-2',
 'Series3_A549_RSV',
 'Series4_A549_IAV',
 'Series5_A549_SARS-CoV-2',
 'Series6_A549-ACE2_SARS-CoV-2',
 'Series7_Calu3_SARS-CoV-2',
 'Series8_A549_RSV',
 'Series8_A549_HPIV3',
 'Series9_NHBE_IAV',
 'Series9_NHBE_IAVdNS1',
 'Series15_COVID19Lung',
 'Series16_A549-ACE2_SARS-CoV-2',
 'Series16_A549-ACE2_SARS-CoV-2_Rux']

In [225]:
dfsc = pd.concat([(dfs.iloc[:,np.array(cluster)[1]-1].transform(np.log2).mean(axis = 1) - dfs.iloc[:,np.array(cluster)[0]-1].transform(np.log2).mean(axis = 1)).replace([np.inf, -np.inf, np.nan], 0) for cluster in clusters], axis = 1)

In [226]:
dfsc.columns = experiments
dfsc.to_pickle("sars2cleaned.pkl")
dfsc

Unnamed: 0,Series1_NHBE_SARS-CoV-2,Series2_A549_SARS-CoV-2,Series3_A549_RSV,Series4_A549_IAV,Series5_A549_SARS-CoV-2,Series6_A549-ACE2_SARS-CoV-2,Series7_Calu3_SARS-CoV-2,Series8_A549_RSV,Series8_A549_HPIV3,Series9_NHBE_IAV,Series9_NHBE_IAVdNS1,Series15_COVID19Lung,Series16_A549-ACE2_SARS-CoV-2,Series16_A549-ACE2_SARS-CoV-2_Rux
DDX11L1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
WASH7P,0.276106,0.450996,-1.712436,-2.160964,-0.037497,-2.889628,-0.343641,-0.806679,-0.883788,-1.330497,-0.131567,0.0,0.0,0.0
FAM138A,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
FAM138F,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
OR4F5,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CDY1B,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
CDY1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
CSPG4P1Y,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
GOLGA2P3Y,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0


In [227]:
dfscRowNames = dfsc.index.values
dfhRowNames = dfh.index.values

In [228]:
commonGeneNames = np.intersect1d(dfscRowNames, dfhRowNames)
commonGeneNames

array(['AAAS', 'AACS', 'AADAC', ..., 'ZYX', 'ZZEF1', 'ZZZ3'], dtype=object)

In [229]:
import copy
usefulCols = copy.copy(experiments)
dropping = [experiments[i] for i in [1,4,11]]
[usefulCols.remove(drop) for drop in dropping]
usefulCols

['Series1_NHBE_SARS-CoV-2',
 'Series3_A549_RSV',
 'Series4_A549_IAV',
 'Series6_A549-ACE2_SARS-CoV-2',
 'Series7_Calu3_SARS-CoV-2',
 'Series8_A549_RSV',
 'Series8_A549_HPIV3',
 'Series9_NHBE_IAV',
 'Series9_NHBE_IAVdNS1',
 'Series16_A549-ACE2_SARS-CoV-2',
 'Series16_A549-ACE2_SARS-CoV-2_Rux']

In [230]:
sars2Cols = [colName for colName in usefulCols if ("SARS" in colName) ]
sars2Cols

['Series1_NHBE_SARS-CoV-2',
 'Series6_A549-ACE2_SARS-CoV-2',
 'Series7_Calu3_SARS-CoV-2',
 'Series16_A549-ACE2_SARS-CoV-2',
 'Series16_A549-ACE2_SARS-CoV-2_Rux']

In [231]:
nosars2Cols = [colName for colName in usefulCols if ("SARS" not in colName) ]
nosars2Cols

['Series3_A549_RSV',
 'Series4_A549_IAV',
 'Series8_A549_RSV',
 'Series8_A549_HPIV3',
 'Series9_NHBE_IAV',
 'Series9_NHBE_IAVdNS1']

In [232]:
dfhf = dfh.loc[commonGeneNames]
dfsf = dfsc.loc[commonGeneNames]

In [233]:
dff = pd.concat([dfhf,dfsf[usefulCols]], axis = 1)
dff

Unnamed: 0,EB1_WT_0h__b,EB1_WT_00h__b,EB1_WT_8h__b,EB1_WT_18h__b,EB1_WT_24h__b,EB1_WT_48h__b,EB1_mucin_0h__b,EB1_mucin_00h__b,EB1_mucin_8h__b,EB1_mucin_18h__b,...,Series3_A549_RSV,Series4_A549_IAV,Series6_A549-ACE2_SARS-CoV-2,Series7_Calu3_SARS-CoV-2,Series8_A549_RSV,Series8_A549_HPIV3,Series9_NHBE_IAV,Series9_NHBE_IAVdNS1,Series16_A549-ACE2_SARS-CoV-2,Series16_A549-ACE2_SARS-CoV-2_Rux
AAAS,-0.053539,-0.021629,0.069450,0.390366,0.073836,-0.442246,-0.082826,-0.422886,-0.045865,0.308248,...,-1.989792,-1.184323,-2.474763,-0.570161,-0.351737,0.351903,-0.103042,0.155755,-1.711231,-1.278946
AACS,0.031504,0.131252,0.309998,0.401375,0.165346,0.079663,-0.291830,-0.386167,0.207874,0.335756,...,-2.202171,-0.946714,-2.731488,-0.753072,1.162057,1.322487,0.342570,0.522355,-1.458900,-0.959009
AADAC,-0.041660,0.031732,-0.106712,-0.224353,-0.045614,-0.890692,-0.040650,0.053375,-0.220946,-0.279067,...,-1.719463,-0.370890,-1.723320,-0.723308,1.957697,2.213505,0.000000,0.000000,0.534023,0.789086
AAK1,0.139425,0.148185,0.251240,0.290396,0.383917,0.680079,-0.185509,0.495312,-0.189335,0.234855,...,-1.983893,-0.421551,-2.154003,0.651957,1.864634,1.745253,0.652229,0.388312,-0.427195,-0.154886
AAMP,0.139837,0.005684,0.077773,0.128518,0.132485,0.122469,0.025393,0.055635,0.113951,0.080511,...,-1.860399,-1.139992,-2.301581,-0.598850,0.854072,1.078711,0.229610,0.488917,-1.715057,-1.392461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.045434,-0.015377,-0.169418,-0.392423,0.231872,-0.102981,-0.152677,-0.130969,-0.121257,-0.187774,...,-1.651267,-0.765669,-1.987821,0.261876,1.237991,1.567384,0.101000,0.519108,-0.767060,-0.511800
ZYG11B,-0.065778,0.048467,-0.124070,-0.093041,-0.090427,-0.550867,-0.152835,-0.038874,-0.064925,-0.487526,...,-2.451050,-0.349517,-2.229211,0.250027,1.041049,1.889957,0.713448,0.340771,-1.596324,-0.697522
ZYX,0.045843,-0.164044,0.114723,0.658516,0.456055,0.298952,-0.443505,-0.754256,-0.023101,0.849493,...,-1.160028,-0.684129,-2.960088,0.290458,0.872186,0.634645,0.163536,0.573991,-1.208096,-1.010533
ZZEF1,0.137463,-0.011429,0.198544,0.355522,0.306401,0.334437,-0.138179,0.158692,0.134387,0.329527,...,-1.849951,-0.607940,-1.769701,-0.017877,1.321725,1.615219,0.476449,-0.023698,-0.032857,0.421377


In [234]:
dff.to_pickle("biggerTrans.pkl")
dff.to_csv("biggerTrans.csv")

In [235]:
dffwos2 = pd.concat([dfhf,dfsf[nosars2Cols]], axis = 1)
dffwos2

Unnamed: 0,EB1_WT_0h__b,EB1_WT_00h__b,EB1_WT_8h__b,EB1_WT_18h__b,EB1_WT_24h__b,EB1_WT_48h__b,EB1_mucin_0h__b,EB1_mucin_00h__b,EB1_mucin_8h__b,EB1_mucin_18h__b,...,calu3_SARS__b_nsp16_moi5_36h,calu3_SARS__b_nsp16_moi5_48h,calu3_SARS__b_nsp16_moi5_60h,calu3_SARS__b_nsp16_moi5_72h,Series3_A549_RSV,Series4_A549_IAV,Series8_A549_RSV,Series8_A549_HPIV3,Series9_NHBE_IAV,Series9_NHBE_IAVdNS1
AAAS,-0.053539,-0.021629,0.069450,0.390366,0.073836,-0.442246,-0.082826,-0.422886,-0.045865,0.308248,...,0.161498,-0.190269,-0.092381,-0.055857,-1.989792,-1.184323,-0.351737,0.351903,-0.103042,0.155755
AACS,0.031504,0.131252,0.309998,0.401375,0.165346,0.079663,-0.291830,-0.386167,0.207874,0.335756,...,0.147569,-0.175630,-0.074628,0.162620,-2.202171,-0.946714,1.162057,1.322487,0.342570,0.522355
AADAC,-0.041660,0.031732,-0.106712,-0.224353,-0.045614,-0.890692,-0.040650,0.053375,-0.220946,-0.279067,...,-0.563230,-0.054222,0.463434,1.121392,-1.719463,-0.370890,1.957697,2.213505,0.000000,0.000000
AAK1,0.139425,0.148185,0.251240,0.290396,0.383917,0.680079,-0.185509,0.495312,-0.189335,0.234855,...,0.034660,0.276221,0.386410,-0.384465,-1.983893,-0.421551,1.864634,1.745253,0.652229,0.388312
AAMP,0.139837,0.005684,0.077773,0.128518,0.132485,0.122469,0.025393,0.055635,0.113951,0.080511,...,0.108959,-0.065833,-0.096275,0.085290,-1.860399,-1.139992,0.854072,1.078711,0.229610,0.488917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.045434,-0.015377,-0.169418,-0.392423,0.231872,-0.102981,-0.152677,-0.130969,-0.121257,-0.187774,...,-0.158400,-0.112629,-0.295572,0.069981,-1.651267,-0.765669,1.237991,1.567384,0.101000,0.519108
ZYG11B,-0.065778,0.048467,-0.124070,-0.093041,-0.090427,-0.550867,-0.152835,-0.038874,-0.064925,-0.487526,...,-0.172543,-0.286687,-0.276806,-0.064110,-2.451050,-0.349517,1.041049,1.889957,0.713448,0.340771
ZYX,0.045843,-0.164044,0.114723,0.658516,0.456055,0.298952,-0.443505,-0.754256,-0.023101,0.849493,...,0.168304,-0.130368,-0.144799,-0.077492,-1.160028,-0.684129,0.872186,0.634645,0.163536,0.573991
ZZEF1,0.137463,-0.011429,0.198544,0.355522,0.306401,0.334437,-0.138179,0.158692,0.134387,0.329527,...,0.079408,0.079414,0.241151,0.286260,-1.849951,-0.607940,1.321725,1.615219,0.476449,-0.023698


In [236]:
dffwos2.to_pickle("biggerTransNoSars2.pkl")
dffwos2.to_csv("biggerTransNoSars2.csv")

In [237]:
nosarsCols = [colName for colName in list(dfh.columns) if ("SARS" not in colName)]
#nosarsCols

In [238]:
dffwos = pd.concat([dfhf[nosarsCols],dfsf[nosars2Cols]], axis = 1)
dffwos

Unnamed: 0,EB1_WT_0h__b,EB1_WT_00h__b,EB1_WT_8h__b,EB1_WT_18h__b,EB1_WT_24h__b,EB1_WT_48h__b,EB1_mucin_0h__b,EB1_mucin_00h__b,EB1_mucin_8h__b,EB1_mucin_18h__b,...,Day7_MERS10e5_Vs_Mock_b,Day2_MERS10e6_Vs_Mock_b,Day4_MERS10e6_Vs_Mock_b,Day7_MERS10e6_Vs_Mock_b,Series3_A549_RSV,Series4_A549_IAV,Series8_A549_RSV,Series8_A549_HPIV3,Series9_NHBE_IAV,Series9_NHBE_IAVdNS1
AAAS,-0.053539,-0.021629,0.069450,0.390366,0.073836,-0.442246,-0.082826,-0.422886,-0.045865,0.308248,...,0.171651,-0.199660,0.792779,0.368657,-1.989792,-1.184323,-0.351737,0.351903,-0.103042,0.155755
AACS,0.031504,0.131252,0.309998,0.401375,0.165346,0.079663,-0.291830,-0.386167,0.207874,0.335756,...,-0.784688,-0.504837,-0.136926,-0.879605,-2.202171,-0.946714,1.162057,1.322487,0.342570,0.522355
AADAC,-0.041660,0.031732,-0.106712,-0.224353,-0.045614,-0.890692,-0.040650,0.053375,-0.220946,-0.279067,...,0.136705,-0.028470,-0.001031,-0.184787,-1.719463,-0.370890,1.957697,2.213505,0.000000,0.000000
AAK1,0.139425,0.148185,0.251240,0.290396,0.383917,0.680079,-0.185509,0.495312,-0.189335,0.234855,...,0.486412,0.121008,-0.047186,0.362905,-1.983893,-0.421551,1.864634,1.745253,0.652229,0.388312
AAMP,0.139837,0.005684,0.077773,0.128518,0.132485,0.122469,0.025393,0.055635,0.113951,0.080511,...,-0.925608,-0.659245,-0.107935,-0.704082,-1.860399,-1.139992,0.854072,1.078711,0.229610,0.488917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.045434,-0.015377,-0.169418,-0.392423,0.231872,-0.102981,-0.152677,-0.130969,-0.121257,-0.187774,...,-0.574723,-0.163952,-0.215609,0.268579,-1.651267,-0.765669,1.237991,1.567384,0.101000,0.519108
ZYG11B,-0.065778,0.048467,-0.124070,-0.093041,-0.090427,-0.550867,-0.152835,-0.038874,-0.064925,-0.487526,...,0.299537,0.160074,0.063669,-0.074202,-2.451050,-0.349517,1.041049,1.889957,0.713448,0.340771
ZYX,0.045843,-0.164044,0.114723,0.658516,0.456055,0.298952,-0.443505,-0.754256,-0.023101,0.849493,...,-0.208051,-0.561684,0.053471,0.290445,-1.160028,-0.684129,0.872186,0.634645,0.163536,0.573991
ZZEF1,0.137463,-0.011429,0.198544,0.355522,0.306401,0.334437,-0.138179,0.158692,0.134387,0.329527,...,0.042277,0.051029,-0.184655,-0.001946,-1.849951,-0.607940,1.321725,1.615219,0.476449,-0.023698


In [239]:
dffwos.to_pickle("biggerTransNoSars.pkl")
dffwos.to_csv("biggerTransNoSars.csv")

In [240]:
nocovCols = [colName for colName in list(dfh.columns) if ("SARS" not in colName and "MERS" not in colName)]
#nocovCols

In [241]:
dffwoc = pd.concat([dfhf[nocovCols],dfsf[nosars2Cols]], axis = 1)
dffwoc

Unnamed: 0,EB1_WT_0h__b,EB1_WT_00h__b,EB1_WT_8h__b,EB1_WT_18h__b,EB1_WT_24h__b,EB1_WT_48h__b,EB1_mucin_0h__b,EB1_mucin_00h__b,EB1_mucin_8h__b,EB1_mucin_18h__b,...,H1N1_WT_calu3_Cal04_12hr__b,H1N1_WT_calu3_Cal04_24hr__b,H1N1_WT_calu3_Cal04_36hr__b,H1N1_WT_calu3_Cal04_48hr__b,Series3_A549_RSV,Series4_A549_IAV,Series8_A549_RSV,Series8_A549_HPIV3,Series9_NHBE_IAV,Series9_NHBE_IAVdNS1
AAAS,-0.053539,-0.021629,0.069450,0.390366,0.073836,-0.442246,-0.082826,-0.422886,-0.045865,0.308248,...,-0.973503,-0.079759,-0.465493,-1.025579,-1.989792,-1.184323,-0.351737,0.351903,-0.103042,0.155755
AACS,0.031504,0.131252,0.309998,0.401375,0.165346,0.079663,-0.291830,-0.386167,0.207874,0.335756,...,-0.360177,-0.277152,-1.198113,-1.204228,-2.202171,-0.946714,1.162057,1.322487,0.342570,0.522355
AADAC,-0.041660,0.031732,-0.106712,-0.224353,-0.045614,-0.890692,-0.040650,0.053375,-0.220946,-0.279067,...,-0.040589,0.002395,0.025896,-0.100877,-1.719463,-0.370890,1.957697,2.213505,0.000000,0.000000
AAK1,0.139425,0.148185,0.251240,0.290396,0.383917,0.680079,-0.185509,0.495312,-0.189335,0.234855,...,-0.513472,-0.781464,-1.457754,-1.365912,-1.983893,-0.421551,1.864634,1.745253,0.652229,0.388312
AAMP,0.139837,0.005684,0.077773,0.128518,0.132485,0.122469,0.025393,0.055635,0.113951,0.080511,...,-0.252762,0.140468,-0.183620,0.077374,-1.860399,-1.139992,0.854072,1.078711,0.229610,0.488917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,0.045434,-0.015377,-0.169418,-0.392423,0.231872,-0.102981,-0.152677,-0.130969,-0.121257,-0.187774,...,-0.694767,0.460238,-0.261143,-0.286167,-1.651267,-0.765669,1.237991,1.567384,0.101000,0.519108
ZYG11B,-0.065778,0.048467,-0.124070,-0.093041,-0.090427,-0.550867,-0.152835,-0.038874,-0.064925,-0.487526,...,-0.916250,-0.569090,-0.918276,-0.913063,-2.451050,-0.349517,1.041049,1.889957,0.713448,0.340771
ZYX,0.045843,-0.164044,0.114723,0.658516,0.456055,0.298952,-0.443505,-0.754256,-0.023101,0.849493,...,-0.199760,0.362320,-0.130798,0.017730,-1.160028,-0.684129,0.872186,0.634645,0.163536,0.573991
ZZEF1,0.137463,-0.011429,0.198544,0.355522,0.306401,0.334437,-0.138179,0.158692,0.134387,0.329527,...,-0.746715,0.108914,-0.964507,-0.822642,-1.849951,-0.607940,1.321725,1.615219,0.476449,-0.023698


In [242]:
dffwoc.to_pickle("biggerTransNoCov.pkl")
dffwoc.to_csv("biggerTransNoCov.csv")