In [1]:
import os
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from matplotlib import gridspec
import seaborn as sns
import math

In [2]:
%matplotlib inline

# Load data

In [3]:
trp_table = pd.DataFrame.from_csv('L44_TRP_sorted_table.csv')

In [4]:
trp_table.shape

(337, 337)

In [5]:
trp_table

Unnamed: 0,A_HT_DHD_100,B_HT_DHD_100,A_HT_DHD_10,B_HT_DHD_10,A_HT_DHD_11,B_HT_DHD_11,A_HT_DHD_12,B_HT_DHD_12,A_HT_DHD_13,B_HT_DHD_13,...,Bcl-w,Bcl-xL,Bfl-1,FECM04,Mcl1[151-321],XCDP07,alphaBCL2,alphaBCLB,alphaBFL1,alphaMCL1
A_HT_DHD_100,70.0,396.0,221.0,194.0,374.0,490.0,75.0,201.0,65.0,228.0,...,161.0,295.0,530.0,516.0,1021.0,143.0,360.0,815.0,324.0,827.0
B_HT_DHD_100,236.0,526.0,200.0,505.0,312.0,516.0,110.0,193.0,317.0,205.0,...,72.0,380.0,791.0,523.0,481.0,241.0,216.0,1191.0,805.0,979.0
A_HT_DHD_10,54.0,492.0,59.0,264.0,79.0,202.0,85.0,276.0,43.0,239.0,...,70.0,212.0,533.0,629.0,717.0,69.0,54.0,583.0,661.0,517.0
B_HT_DHD_10,266.0,580.0,88.0,497.0,621.0,596.0,29.0,202.0,140.0,531.0,...,116.0,409.0,1039.0,1025.0,1796.0,281.0,353.0,1275.0,869.0,1583.0
A_HT_DHD_11,176.0,309.0,163.0,496.0,416.0,677.0,228.0,428.0,58.0,309.0,...,222.0,439.0,1087.0,959.0,1697.0,96.0,359.0,864.0,1118.0,1281.0
B_HT_DHD_11,38.0,409.0,59.0,244.0,180.0,386.0,186.0,82.0,227.0,322.0,...,155.0,241.0,371.0,312.0,1386.0,47.0,68.0,502.0,771.0,823.0
A_HT_DHD_12,279.0,576.0,240.0,198.0,456.0,830.0,48.0,394.0,98.0,480.0,...,229.0,435.0,1126.0,785.0,1248.0,252.0,388.0,755.0,1287.0,1621.0
B_HT_DHD_12,36.0,1264.0,189.0,567.0,244.0,636.0,356.0,,60.0,337.0,...,86.0,386.0,1082.0,755.0,1285.0,116.0,638.0,761.0,1287.0,1398.0
A_HT_DHD_13,241.0,385.0,92.0,473.0,158.0,832.0,120.0,244.0,6.0,388.0,...,64.0,476.0,701.0,814.0,760.0,321.0,305.0,793.0,658.0,650.0
B_HT_DHD_13,110.0,313.0,320.0,361.0,224.0,689.0,153.0,156.0,85.0,396.0,...,100.0,516.0,858.0,606.0,1397.0,246.0,401.0,1434.0,1317.0,1096.0


# 1mM 3-AT HIS Data

In [7]:
his_table = pd.DataFrame.from_csv('L44_HIS_1mM_3AT_sorted_table.csv')

In [8]:
his_table.shape

(337, 337)

In [9]:
his_table.head(2)

Unnamed: 0,A_HT_DHD_100,B_HT_DHD_100,A_HT_DHD_10,B_HT_DHD_10,A_HT_DHD_11,B_HT_DHD_11,A_HT_DHD_12,B_HT_DHD_12,A_HT_DHD_13,B_HT_DHD_13,...,Bcl-w,Bcl-xL,Bfl-1,FECM04,Mcl1[151-321],XCDP07,alphaBCL2,alphaBCLB,alphaBFL1,alphaMCL1
A_HT_DHD_100,13.0,210.0,161.0,25.0,43.0,67.0,35.0,8.0,7.0,34.0,...,28.0,198.0,122.0,401.0,123.0,33.0,33.0,185.0,90.0,80.0
B_HT_DHD_100,14.0,82.0,19.0,114.0,9969.0,86.0,28.0,34.0,25.0,74.0,...,10.0,22.0,150.0,93.0,90.0,22.0,33.0,140.0,96.0,124.0


In [10]:
lin_df = pd.DataFrame(-1.0, columns = trp_table.columns, index = trp_table.index)

In [11]:
lin_df.head(2)

Unnamed: 0,A_HT_DHD_100,B_HT_DHD_100,A_HT_DHD_10,B_HT_DHD_10,A_HT_DHD_11,B_HT_DHD_11,A_HT_DHD_12,B_HT_DHD_12,A_HT_DHD_13,B_HT_DHD_13,...,Bcl-w,Bcl-xL,Bfl-1,FECM04,Mcl1[151-321],XCDP07,alphaBCL2,alphaBCLB,alphaBFL1,alphaMCL1
A_HT_DHD_100,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
B_HT_DHD_100,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [12]:
log_df = pd.DataFrame(0.0, columns = trp_table.columns, index = trp_table.index)

In [13]:
log_df.head(2)

Unnamed: 0,A_HT_DHD_100,B_HT_DHD_100,A_HT_DHD_10,B_HT_DHD_10,A_HT_DHD_11,B_HT_DHD_11,A_HT_DHD_12,B_HT_DHD_12,A_HT_DHD_13,B_HT_DHD_13,...,Bcl-w,Bcl-xL,Bfl-1,FECM04,Mcl1[151-321],XCDP07,alphaBCL2,alphaBCLB,alphaBFL1,alphaMCL1
A_HT_DHD_100,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B_HT_DHD_100,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
trp_count = trp_table.fillna(0).values.sum()

In [15]:
trp_count

33394129.0

In [16]:
his_count = his_table.fillna(0).values.sum()

In [17]:
his_count

34925075.0

In [18]:
norm_coef = trp_count / his_count

In [19]:
norm_coef

0.95616484717641981

In [20]:
#L39_lin_enrich_df = norm_coef * L39_his_table.fillna(0) / L39_trp_table.fillna(1)
lin_df = norm_coef * his_table / trp_table.fillna(1)

In [21]:
lin_df.shape

(337, 337)

In [22]:
lin_df.head(2)

Unnamed: 0,A_HT_DHD_100,B_HT_DHD_100,A_HT_DHD_10,B_HT_DHD_10,A_HT_DHD_11,B_HT_DHD_11,A_HT_DHD_12,B_HT_DHD_12,A_HT_DHD_13,B_HT_DHD_13,...,Bcl-w,Bcl-xL,Bfl-1,FECM04,Mcl1[151-321],XCDP07,alphaBCL2,alphaBCLB,alphaBFL1,alphaMCL1
A_HT_DHD_100,0.177573,0.507057,0.696573,0.123217,0.109933,0.130741,0.44621,0.038056,0.102972,0.142586,...,0.16629,0.641765,0.220098,0.743066,0.115189,0.220653,0.087648,0.217044,0.265601,0.092495
B_HT_DHD_100,0.056722,0.14906,0.090836,0.215847,30.551306,0.159361,0.243387,0.168444,0.075407,0.345152,...,0.132801,0.055357,0.181321,0.170025,0.178908,0.087285,0.146081,0.112396,0.114027,0.121108


In [23]:
log_df = lin_df.applymap(np.log)

In [24]:
log_df.head(2)

Unnamed: 0,A_HT_DHD_100,B_HT_DHD_100,A_HT_DHD_10,B_HT_DHD_10,A_HT_DHD_11,B_HT_DHD_11,A_HT_DHD_12,B_HT_DHD_12,A_HT_DHD_13,B_HT_DHD_13,...,Bcl-w,Bcl-xL,Bfl-1,FECM04,Mcl1[151-321],XCDP07,alphaBCL2,alphaBCLB,alphaBFL1,alphaMCL1
A_HT_DHD_100,-1.728371,-0.679132,-0.361583,-2.093807,-2.207881,-2.034538,-0.806965,-3.268688,-2.273302,-1.94781,...,-1.794025,-0.443533,-1.513681,-0.29697,-2.161178,-1.511162,-2.434421,-1.527657,-1.325759,-2.380603
B_HT_DHD_100,-2.869599,-1.903407,-2.398703,-1.533185,3.419407,-1.836584,-1.413101,-1.781155,-2.584851,-1.06377,...,-2.018906,-2.893954,-1.707488,-1.771807,-1.720883,-2.438579,-1.923596,-2.185731,-2.171319,-2.111075


In [27]:
#lin_df.to_csv('L44_lin_enrich_1mM_3AT.csv')

In [28]:
#log_df.to_csv('L44_log_enrich_1mM_3AT.csv')