In [1]:
import pandas as pd
from scipy import stats
import numpy as np
import gnt
import statsmodels as sm

In [2]:
def safe_correlate(x, y):
    nas = np.logical_or(x.isnull(), y.isnull())
    cor = stats.pearsonr(x[~nas], y[~nas])
    return cor

In [3]:
gene_effect_df = pd.read_csv('../data/DepMap/Achilles_gene_effect.csv.gz', index_col=0)
sample_info = pd.read_csv('../data/DepMap/sample_info.csv')
library_df = pd.read_csv('../data/raw/Cas12_FASN_GI_12k_library.csv')

In [4]:
gene_effect_df.columns = [x.split(' ')[0] for x in gene_effect_df.columns]

In [5]:
blodd_cell_lines = sample_info.DepMap_ID[sample_info.lineage == 'blood']
blood_gene_effect_df = gene_effect_df[gene_effect_df.index.isin(blodd_cell_lines)]

In [6]:
gene_combos = library_df[['GENE_1', 'GENE_2']].drop_duplicates()
gene_combos = gnt.score.order_cols(gene_combos, [0, 1], 'gene')
gene_combos = (gene_combos[['gene_a', 'gene_b']].drop_duplicates()
               .reset_index(drop = True))
library_df[['GENE_1', 'GENE_2']].drop_duplicates().shape[0] - gene_combos.shape[0] 

161

In [7]:
for i, row in gene_combos.iterrows():
    gene_1 = row['gene_a']
    gene_2 = row['gene_b']
    if (gene_1 in gene_effect_df.columns) & (gene_2 in gene_effect_df.columns) & (gene_1 != gene_2):
        all_cor = safe_correlate(gene_effect_df[gene_1], gene_effect_df[gene_2])
        blood_cor = safe_correlate(blood_gene_effect_df[gene_1], blood_gene_effect_df[gene_2])
        gene_combos.loc[i, ['r_all', 'p_all', 'r_blood', 'p_blood']] = all_cor[0], all_cor[1], blood_cor[0], blood_cor[1]

In [8]:
gene_combos = (gene_combos.dropna()
               .reset_index(drop=True))
gene_combos['fdr_bh_all'] = sm.stats.multitest.multipletests(gene_combos['p_all'], method='fdr_bh')[1]
gene_combos['fdr_bh_blood'] = sm.stats.multitest.multipletests(gene_combos['p_blood'], method='fdr_bh')[1]

In [9]:
gene_combos.sort_values('fdr_bh_all')

Unnamed: 0,gene_a,gene_b,r_all,p_all,r_blood,p_blood,fdr_bh_all,fdr_bh_blood
782,PSTK,SEPSECS,0.709867,7.153961e-119,0.545930,2.225195e-04,1.831414e-115,2.034464e-02
688,GPX4,PSTK,0.680166,1.539906e-105,0.340555,2.934855e-02,1.971080e-102,3.010007e-01
910,SCD,SREBF1,0.649440,2.575613e-93,0.762912,6.699927e-09,2.197857e-90,2.450259e-06
261,ACACA,FASN,0.622787,8.325751e-84,0.834928,1.155297e-11,5.328481e-81,9.858536e-09
909,SCAP,SREBF1,0.606891,1.468397e-78,0.605176,2.770259e-05,7.518191e-76,3.732560e-03
...,...,...,...,...,...,...,...,...
1826,GSR,GSX2,-0.000118,9.973917e-01,-0.459820,2.489387e-03,9.996492e-01,8.191959e-02
348,FASN,TP53,-0.000092,9.979780e-01,0.026116,8.712452e-01,9.996492e-01,9.836961e-01
2134,PLPP2,SLC36A3,-0.000044,9.990373e-01,-0.059503,7.117171e-01,9.996492e-01,9.367587e-01
1956,BMP15,PMVK,-0.000107,9.977132e-01,-0.123239,4.547960e-01,9.996492e-01,8.508685e-01


In [10]:
gene_combos.to_csv('../data/processed/coessentiality_correlations.csv', index=False)

In [11]:
sample_info[sample_info.stripped_cell_line_name.isin(['NOMO1', 'MOLM13'])]

Unnamed: 0,DepMap_ID,stripped_cell_line_name,CCLE_Name,Alias,COSMICID,sex,source,Achilles_n_replicates,cell_line_NNMD,culture_type,...,primary_or_metastasis,primary_disease,Subtype,age,Sanger_Model_ID,depmap_public_comments,lineage,lineage_subtype,lineage_sub_subtype,lineage_molecular_subtype
167,ACH-000168,NOMO1,NOMO1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,,908451.0,Female,HSRRB,2.0,-2.180198,Suspension,...,Primary,Leukemia,"Acute Myelogenous Leukemia (AML), M5 (Eosinoph...",31.0,SIDM00580,,blood,AML,M5,
361,ACH-000362,MOLM13,MOLM13_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,,1330947.0,Male,DSMZ,3.0,-3.152902,Suspension,...,Primary,Leukemia,Acute Myelogenous Leukemia (AML),20.0,SIDM00437,,blood,AML,,


In [12]:
genes = list(set(library_df.GENE_1.to_list() + library_df.GENE_2.to_list()))

In [13]:
relavent_effects = gene_effect_df.iloc[gene_effect_df.index.isin(['ACH-000168', 'ACH-000362']), gene_effect_df.columns.isin(genes)].T

In [14]:
relavent_effects

DepMap_ID,ACH-000168,ACH-000362
AARS,-1.560563,-1.885740
ACACA,-0.280365,0.494347
ACACB,-0.035320,0.051807
ACAT1,0.130197,0.203246
ACLY,-0.651805,-0.666325
...,...,...
UBE2G2,-0.585606,-0.683566
UBQLN3,0.007435,0.058443
VCP,-1.585862,-1.921206
WDR3,-1.062218,-1.189738


In [15]:
relavent_effects.columns = ['NOMO1', 'MOLM13']

In [16]:
relavent_effects.to_csv('../data/DepMap/filtered_effects.csv')