In [77]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
from sklearn.metrics import r2_score
from scipy.stats import pearsonr, spearmanr
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.patches as mpatches
from scipy.stats import ttest_ind, ttest_1samp, ttest_rel
import matplotlib as mpl
import umap
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from joblib import Parallel, delayed
from tqdm.notebook import tqdm, trange
import pickle
import matplotlib.ticker as mtick
import math
from itertools import combinations
import random
from sklearn.linear_model import LinearRegression
from qgrid import show_grid
from statsmodels.stats.multitest import fdrcorrection, multipletests


In [78]:
plt.rcParams['font.family'] = 'Arial'
# plt.rcParams["axes.labelweight"] = "bold"
# plt.rcParams['font.size'] = 18
# plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = (10.0, 7.0)
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
sns.set_palette("Dark2")
plt.rcParams['figure.dpi'] = 200
from crispy.CrispyPlot import CrispyPlot

sns.set(
    style="ticks",
    context="paper",
    font_scale=0.75,
    font="Arial",
    rc=CrispyPlot.SNS_RC,
)

In [79]:
seed = 42

# load data

In [80]:
colour_list = pd.read_csv('../../data/colour_list.txt',header=None)[0].tolist()

protein_ruv = pd.read_csv(
    "../../data/protein/e0022_diann_051021_working_matrix_averaged_processed.txt",
    sep='\t').set_index('Cell_line')


meta = pd.read_csv("../../data/e0022_diann_051021_sample_mapping_averaged.txt",
                   sep='\t')

cell_lines_no_blood = meta[meta['Tissue_type']!='Haematopoietic and Lymphoid'].index.values

In [81]:
tissue_type_map = meta.drop_duplicates(['Cell_line']).set_index(
    ['Cell_line'])[['Tissue_type']].to_dict()['Tissue_type']
cancer_type_map = meta.drop_duplicates(['Cell_line']).set_index(
    ['Cell_line'])[['Cancer_type']].to_dict()['Cancer_type']

In [82]:
tissue_count = meta.drop_duplicates(['Cell_line', 'Tissue_type']).groupby('Tissue_type').size()
major_tissues = tissue_count[tissue_count>30].index.values

In [83]:
cancer_colours = {
    'Non-Small Cell Lung Carcinoma': '#007fff',
    'Prostate Carcinoma': '#665d1e',
    'Gastric Carcinoma': '#ffbf00',
    'Glioblastoma': '#fbceb1',
    'Melanoma': '#ff033e',
    'Bladder Carcinoma': '#ab274f',
    'B-Lymphoblastic Leukemia': '#d5e6f7',
    'Kidney Carcinoma': '#7cb9e8',
    'Thyroid Gland Carcinoma': '#efdecd',
    'Rhabdomyosarcoma': '#8db600',
    'Head and Neck Carcinoma': '#e9d66b',
    'Ovarian Carcinoma': '#b284be',
    "B-Cell Non-Hodgkin's Lymphoma": '#b2beb5',
    'Other Solid Carcinomas': '#10b36f',
    "Ewing's Sarcoma": '#6e7f80',
    'T-Lymphoblastic Leukemia': '#ff7e00',
    'Plasma Cell Myeloma': '#87a96b',
    'Endometrial Carcinoma': '#c9ffe5',
    'Non-Cancerous': '#9f2b68',
    'Breast Carcinoma': '#00ffff',
    'Pancreatic Carcinoma': '#008000',
    'Neuroblastoma': '#cd9575',
    "Burkitt's Lymphoma": '#72a0c1',
    'Hairy Cell Leukemia': '#a32638',
    'Chronic Myelogenous Leukemia': '#9966cc',
    'Glioma': '#f19cbb',
    'Cervical Carcinoma': '#e32636',
    'Colorectal Carcinoma': '#3b7a57',
    'Hepatocellular Carcinoma': '#faebd7',
    'Vulvar carcinoma': '#fdee00',
    'Osteosarcoma': '#00308f',
    'Chondrosarcoma': '#7fffd4',
    'Small Cell Lung Carcinoma': '#c46210',
    'Esophageal Carcinoma': '#a8bb19',
    'Uncertain': '#ff9966',
    "T-Cell Non-Hodgkin's Lymphoma": '#a52a2a',
    'Non-small Cell Lung Carcinoma': '#568203',
    'Other Sarcomas': '#4b5320',
    'Biliary Tract Carcinoma': '#5d8aa8',
    'Acute Myeloid Leukemia': '#8f9779',
    "Hodgkin's Lymphoma": '#915c83',
    'Mesothelioma': '#841b2d',
    'B-Lymphoblastic leukemia': '#a4c639',
    'Other Blood Cancers': '#3b444b',
    'Carcinoid Tumour': '#006600',
    'Leiomyosarcoma': '#0000ff',
    "T-cell Non-Hodgkin's Lymphoma": '#666699'
}

tissue_colours = {
    'Lung': '#007fff',
    'Prostate': '#665d1e',
    'Stomach': '#ffbf00',
    'Central Nervous System': '#fbceb1',
    'Skin': '#ff033e',
    'Bladder': '#ab274f',
    'Haematopoietic and Lymphoid': '#d5e6f7',
    'Kidney': '#7cb9e8',
    'Thyroid': '#efdecd',
    'Soft Tissue': '#8db600',
    'Head and Neck': '#e9d66b',
    'Ovary': '#b284be',
    'Bone': '#b2beb5',
    'Endometrium': '#10b36f',
    'Breast': '#6e7f80',
    'Pancreas': '#ff7e00',
    'Peripheral Nervous System': '#87a96b',
    'Cervix': '#c9ffe5',
    'Large Intestine': '#9f2b68',
    'Liver': '#00ffff',
    'Vulva': '#008000',
    'Esophagus': '#cd9575',
    'Biliary Tract': '#72a0c1',
    'Other tissue': '#a32638',
    'Small Intestine': '#9966cc',
    'Placenta': '#f19cbb',
    'Testis': '#e32636',
    'Adrenal Gland': '#3b7a57'
}

instrument_colours = {
    'M01': '#66c2a5',
    'M02': '#fc8d62',
    'M03': '#8da0cb',
    'M04': '#e78ac3',
    'M05': '#a6d854',
    'M06': '#ffd92f'
}

batch_colours = {
    'P01': '#7fc97f',
    'P02': '#beaed4',
    'P03': '#fdc086',
    'P04': '#386cb0',
    'P05': '#f0027f',
    'P06': '#bf5b17'
}

tissue_colours['Other'] = 'white'
cancer_colours['Other'] = 'white'

In [84]:
meta_cell_lines = meta.drop_duplicates('Cell_line')

In [85]:
name_map_df = pd.read_csv(f"../../data/misc/uniprot_human_idmap.tab.gz",
                       sep='\t')
name_map_dict = name_map_df.set_index("Entry name").to_dict()['Gene names  (primary )']
protein2rna_map = name_map_dict
rna2protein_map = name_map_df.set_index("Gene names  (primary )").to_dict()['Entry name']

In [86]:
rna_raw = pd.read_csv("../../data/rna/rnaseq_voom.csv", index_col=0).T

rna_raw.index.name = 'SIDM'

rna_raw = rna_raw.reset_index()
rna_sample = pd.merge(rna_raw,
                      meta[['SIDM',
                            'Cell_line']].drop_duplicates()).drop(['SIDM'],
                                                                  axis=1)

rna_sample = rna_sample.set_index(['Cell_line'])

In [87]:
drug_df = pd.read_csv('../../data/drug/drug_final_processed_eg_id.csv.gz', low_memory=False)

In [88]:
drug_df['max_screening_conc_ln'] = drug_df['max_screening_conc'].map(np.log)

In [89]:
max_conc_map = drug_df[[
    'drug_id', 'max_screening_conc_ln'
]].drop_duplicates().set_index('drug_id').to_dict()['max_screening_conc_ln']

In [90]:
lm_res = pd.read_csv("../../result_files/lm/lm_sklearn_degr_drug_annotated_diann_051021.csv.gz")
# lm_res = pd.read_csv("../../result_files/lm/lm_sklearn_degr_drug_annotated_old.csv.gz")

In [44]:
lm_res_old = pd.read_csv("../../result_files/lm/lm_sklearn_degr_drug_annotated_DIANN.csv.gz")

In [15]:
lm_res[(lm_res['fdr']<0.1)|(lm_res['nc_fdr']<0.001)].shape

(70248, 16)

In [16]:
lm_res = lm_res[((lm_res['fdr'] < 0.1) | (lm_res['nc_fdr'] < 0.001))
                & (lm_res['r2'] > 0.4) & (lm_res['skew'] < -1)]
lm_res['x_protein'] = lm_res['x_id'].map(rna2protein_map)

In [17]:
lm_res[((lm_res['x_id'] == 'DBNL') &
        (lm_res['y_id'].str.contains('Dabrafenib'))) |
       ((lm_res['x_id'] == 'NOC2L') &
        (lm_res['y_id'].str.contains('AZD5582'))) |
       ((lm_res['x_id'] == 'TSNAX') &
        (lm_res['y_id'].str.contains('Venetoclax')))]

Unnamed: 0,y_id,x_id,n,beta,lr,covs,pval,fdr,nc_beta,nc_lr,nc_pval,nc_fdr,r2,target,ppi,skew,x_protein
266124,1373;Dabrafenib;GDSC1,DBNL,782,-0.160988,7.4141,22,0.006471,0.427061,-0.253135,16.842463,4.1e-05,0.000503,0.495096,BRAF,2,-1.809261,DBNL_HUMAN
682045,1373;Dabrafenib;GDSC2,DBNL,876,-0.086523,2.269173,22,0.13197,0.642263,-0.242367,16.788143,4.2e-05,0.000625,0.591585,BRAF,2,-1.457743,DBNL_HUMAN


In [18]:
lm_assoc_list = list(zip(lm_res['y_id'], lm_res['x_id'], lm_res['x_protein']))

In [19]:
lm_res.head()

Unnamed: 0,y_id,x_id,n,beta,lr,covs,pval,fdr,nc_beta,nc_lr,nc_pval,nc_fdr,r2,target,ppi,skew,x_protein
11,1403;AZD6094;GDSC1,CAPZA1,864,0.361678,37.166309,22,1.084717e-09,6e-06,0.203067,15.666215,7.556195e-05,0.02862279,0.430568,MET,3,-4.084074,CAZA1_HUMAN
53,1427;AZD5582;GDSC1,CD47,838,-0.526607,31.077573,22,2.479194e-08,9.6e-05,-0.419799,52.910872,3.490322e-13,4.766907e-10,0.579682,BIRC2;BIRC3;XIAP,-,-1.084352,CD47_HUMAN
54,1427;AZD5582;GDSC1,SSR4,855,-0.541242,30.400967,22,3.513529e-08,9.6e-05,-0.552433,40.906667,1.596755e-10,7.930064e-08,0.579682,BIRC2;BIRC3;XIAP,-,-1.084352,SSRD_HUMAN
65,1427;AZD5582;GDSC1,PRKCSH,853,-0.684837,28.556612,22,9.099856e-08,0.000166,-0.721808,42.776124,6.137673e-11,3.725567e-08,0.579682,BIRC2;BIRC3;XIAP,-,-1.084352,GLU2B_HUMAN
118,1936;Savolitinib;GDSC2,GNB1,904,0.298049,26.183206,22,3.105113e-07,0.000415,0.245486,26.499893,2.635527e-07,0.0002080561,0.624177,MET,2,-1.306811,GBB1_HUMAN


In [20]:
lm_res[lm_res['x_id']=='MET']

Unnamed: 0,y_id,x_id,n,beta,lr,covs,pval,fdr,nc_beta,nc_lr,nc_pval,nc_fdr,r2,target,ppi,skew,x_protein
373,1936;Savolitinib;GDSC2,MET,423,-0.468669,28.601314,22,8.892182e-08,0.00013,-0.436943,45.093727,1.878261e-11,3.973461e-08,0.616297,MET,T,-1.306811,MET_HUMAN
16069,1403;AZD6094;GDSC1,MET,415,-0.327702,19.166517,22,1.197964e-05,0.025163,-0.431849,58.592743,1.939305e-14,8.14702e-11,0.444541,MET,T,-4.084074,MET_HUMAN
17076,1958;Merestinib;GDSC2,MET,337,-0.39923,15.575225,22,7.92867e-05,0.027103,-0.45738,39.269668,3.691313e-10,1.514177e-06,0.402407,MET;MST1R,T,-1.143132,MET_HUMAN
61129,2103;Capmatinib;GDSC2,MET,249,-0.194174,9.18975,22,0.002433741,0.106761,-0.258109,30.141646,4.016135e-08,0.0001602036,0.7303,MET,T,-1.898467,MET_HUMAN


In [20]:
cancer_genes = pd.read_csv("../../data/misc/cancer_genes_latest.csv")

In [21]:
lm_res['cancer_gene'] = lm_res['x_id'].map(
    lambda x: True if x in cancer_genes['gene_symbol'].values else False)

In [22]:
lm_res.query('cancer_gene == True')

Unnamed: 0,y_id,x_id,n,beta,lr,covs,pval,fdr,nc_beta,nc_lr,nc_pval,nc_fdr,r2,target,ppi,skew,x_protein,cancer_gene
145,1936;Savolitinib;GDSC2,SMARCB1,877,0.335612,24.438985,22,7.670079e-07,0.000455,0.274298,20.763088,5.197514e-06,1.436073e-03,0.624177,MET,3,-1.306811,SNF5_HUMAN,True
173,1936;Savolitinib;GDSC2,DHX9,906,0.466176,23.398120,22,1.317072e-06,0.000582,0.457850,29.168574,6.634664e-08,7.332631e-05,0.624177,MET,3,-1.306811,DHX9_HUMAN,True
177,1078;Imatinib;GDSC2,ARFGEF2,75,-0.540562,28.139649,22,1.128697e-07,0.000586,-0.512654,22.721867,1.872251e-06,4.858491e-03,0.584781,ABL1;KIT;PDGFRA,-,-1.969494,BIG2_HUMAN,True
331,1936;Savolitinib;GDSC2,MET,421,-0.377551,20.535615,22,5.853199e-06,0.001294,-0.420304,42.936755,5.653838e-11,3.124311e-07,0.624177,MET,T,-1.306811,MET_HUMAN,True
549,2103;Capmatinib;GDSC2,PLXNA1,157,-0.263634,25.711817,22,3.963924e-07,0.002018,-0.136108,11.070910,8.769262e-04,2.349239e-01,0.663131,MET,2,-1.898467,PLXA1_HUMAN,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3778359,1032;Afatinib;GDSC1,HSPA8,877,0.008806,0.011841,22,9.133496e-01,0.994546,0.488029,33.052960,8.968240e-09,1.502934e-07,0.564904,EGFR;ERBB2,-,-1.532923,HSP7C_HUMAN,True
3985168,1377;Afatinib;GDSC1,HSPA8,850,-0.001980,0.000456,22,9.829639e-01,0.998364,0.498356,27.863604,1.301756e-07,2.301875e-06,0.643926,EGFR;ERBB2,-,-1.662421,HSP7C_HUMAN,True
4103955,1909;Venetoclax;GDSC2,IDH2,903,-0.000608,0.000065,22,9.935682e-01,0.999277,-0.170214,18.692115,1.536164e-05,5.162383e-04,0.750149,BCL2,3,-1.562840,IDHP_HUMAN,True
4142493,1032;Afatinib;GDSC1,FN1,714,0.000162,0.000030,22,9.956592e-01,0.999489,0.117780,18.840232,1.421371e-05,1.011573e-04,0.564904,EGFR;ERBB2,-,-1.532923,FINC_HUMAN,True


In [69]:
lm_res[["ID", "drug_name", "GDSC"]] = lm_res["y_id"].str.split(pat=";", expand=True)

In [70]:
cols = [
    'y_id', 'x_id', 'n', 'beta', 'pval', 'fdr', 'nc_beta', 'nc_pval', 'nc_fdr',
    'r2', 'target', 'ppi', 'skew', 'cancer_gene'
]
lm_res[["ID", "drug_name", "GDSC"] + cols].to_csv(
    "../../result_files/lm/drug_supptable_pancancer_051021.csv", index=False)

# calculate

In [24]:
protein_df = protein_ruv
rna_df = rna_sample

In [25]:
def calc_box(step=0.2, tissues=[]):
    res_df = []
    for drug_id, rna_target, protein_target in tqdm(lm_assoc_list):
        if protein_target not in protein_df.columns or rna_target not in rna_df.columns:
            continue
        ic50_map = drug_df[drug_df['drug_id'] == drug_id].set_index(
            'cell_line_name').to_dict()['ln_IC50']
        tmp_data_protein = protein_df[[protein_target]]
        tmp_data_rna = rna_df[[rna_target]]
        tmp_data = pd.merge(tmp_data_protein, tmp_data_rna, on='Cell_line')
        tmp_data.loc[:, 'IC50'] = tmp_data.index.map(ic50_map).values
        tmp_data.loc[:, 'tissue'] = tmp_data.index.map(tissue_type_map).values
        tmp_data = tmp_data.dropna().reset_index(drop=True)

        if len(tissues) == 0:
            tissues = tmp_data['tissue'].unique()
        for tissue in tissues:
            tmp_data_tissue = tmp_data[tmp_data['tissue'] ==
                                       tissue].reset_index(drop=True)
            if tmp_data_tissue[protein_target].unique().size < int(
                    1 / step) or tmp_data_tissue[rna_target].unique(
                    ).size < int(1 / step):
                continue
            tmp_data_tissue.loc[:, 'protein_q'] = pd.qcut(
                tmp_data_tissue[protein_target],
                np.arange(0, 1.0001, step),
                duplicates='drop')
            tmp_data_tissue.loc[:, 'rna_q'] = pd.qcut(
                tmp_data_tissue[rna_target],
                np.arange(0, 1.0001, step),
                duplicates='drop')

            medians = tmp_data_tissue[[
                'protein_q', 'IC50'
            ]].groupby('protein_q').median()['IC50'].values
            mono = (np.all(medians[1:] >= medians[:-1]-0.1)
                    or np.all(medians[1:] <= medians[:-1]+0.1))

            protein_last_q = tmp_data_tissue[[
                'protein_q', 'IC50'
            ]].groupby('protein_q').median()['IC50'].values[-1]

            rna_last_q = tmp_data_tissue[[
                'rna_q', 'IC50'
            ]].groupby('rna_q').median()['IC50'].values[-1]
            protein_first_q = tmp_data_tissue[[
                'protein_q', 'IC50'
            ]].groupby('protein_q').median()['IC50'].values[0]
            protein_first_nq = tmp_data_tissue[[
                'protein_q', 'IC50'
            ]].groupby('protein_q').median()['IC50'].values[:-1]
            rna_first_q = tmp_data_tissue[[
                'rna_q', 'IC50'
            ]].groupby('rna_q').median()['IC50'].values[0]
            rna_first_nq = tmp_data_tissue[[
                'rna_q', 'IC50'
            ]].groupby('rna_q').median()['IC50'].values[:-1]
            ic50_mean = tmp_data_tissue['IC50'].mean()
            max_conc = max_conc_map[drug_id]
            p_corr_protein, p_pval_protein = pearsonr(
                tmp_data_tissue['IC50'], tmp_data_tissue[protein_target])
            p_corr_rna, p_pval_rna = pearsonr(
                tmp_data_tissue['IC50'], tmp_data_tissue[rna_target])
            counts = tmp_data_tissue.shape[0]

            tmp_dict = {
                'drug_id': drug_id,
                'protein': protein_target,
                'tissue': tissue,
                'protein_first_q': protein_first_q - ic50_mean,
                'protein_last_q': protein_last_q - ic50_mean,
                'rna_first_q': rna_first_q - ic50_mean,
                'rna_last_q': rna_last_q - ic50_mean,
                'protein_first_nq_std': np.std(protein_first_nq),
                'rna_first_nq_std': np.std(rna_first_nq),
                'IC50_mean': ic50_mean,
                'max_conc': max_conc,
                'mono': mono,
                'p_corr_protein': p_corr_protein,
                'p_pval_protein': p_pval_protein,
                'p_corr_rna': p_corr_rna,
                'p_pval_rna': p_pval_rna,
                'counts': counts
            }
            res_df.append(tmp_dict)
    res_df = pd.DataFrame(res_df)
    return res_df

In [26]:
res_df = calc_box(step=0.2)

res_df['last_q_delta'] = np.abs(
    res_df['rna_last_q'] -
    res_df['protein_last_q'])

res_df.to_csv("../../result_files/lm_box/box_plot_drug_tissues_051021.csv", index=False)

  0%|          | 0/7698 [00:00<?, ?it/s]

# analysis

In [27]:
res_df = pd.read_csv("../../result_files/lm_box/box_plot_drug_tissues_051021.csv")

In [28]:
res_df = pd.merge(res_df,
                  lm_res[[
                      'y_id', 'x_protein', 'x_id', 'fdr', 'target', 'beta',
                      'nc_fdr', 'nc_beta', 'skew', 'ppi', 'r2'
                  ]],
                  left_on=['drug_id', 'protein'],
                  right_on=['y_id', 'x_protein'])

In [29]:
res_df['protein_last_q_minus_max_c'] = res_df['protein_last_q'] + res_df[
    'IC50_mean'] - res_df['max_conc']
res_df['rna_last_q_minus_max_c'] = res_df['rna_last_q'] + res_df[
    'IC50_mean'] - res_df['max_conc']
res_df['last_q_delta'] = res_df['protein_last_q_minus_max_c'] - res_df['rna_last_q_minus_max_c']
res_df['corr_diff'] = res_df['p_corr_protein'].abs() - res_df['p_corr_rna'].abs()

In [30]:
corr_dict_merged = pd.read_csv("../../data/protein_rna_correlations.csv")

target_proteins = corr_dict_merged[corr_dict_merged['corr_diff'].abs(
) < 0.2].sort_values('corr_avg').head(2000)['protein'].values

In [31]:
res_df.shape

(149397, 32)

In [32]:
drug_res_filtered = res_df[(res_df['protein'].isin(target_proteins))
                           & (res_df['beta'] < 0) &
                           (res_df['p_pval_protein'] < 0.1) &
                           (res_df['counts'] > 20)
                           & (res_df['mono'] == True) & (res_df['corr_diff'] > 0.15) &
                           (res_df['protein_last_q_minus_max_c'] <
                            0)].sort_values('corr_diff', ascending=False)[[
                                'drug_id', 'protein', 'x_id', 'target', 'beta', 'fdr','nc_fdr', 'nc_beta',
                                'ppi', 'r2', 'tissue', 'p_corr_protein',
                                'p_pval_protein', 'corr_diff', 'counts', 'skew'
                            ]]

In [33]:
drug_res_filtered.shape

(108, 16)

In [34]:
show_grid(drug_res_filtered)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [35]:
drug_res_filtered.to_csv(
    "../../result_files/drug_tissue/drug_tissue_sensitive_associations_filtered_diann_051021.csv",
    index=False)

In [43]:
drug_res_filtered = pd.read_csv(
    "../../result_files/drug_tissue/drug_tissue_sensitive_associations_filtered_diann_270821.csv"
)

In [36]:
drug_res_filtered['cancer_gene'] = drug_res_filtered['x_id'].map(
    lambda x: True if x in cancer_genes['gene_symbol'].values else False)

In [37]:
cols = [
    'drug_id', 'protein', 'x_id', 'target', 'beta', 'fdr', 'nc_fdr', 'nc_beta',
    'ppi', 'r2', 'tissue', 'p_corr_protein', 'p_pval_protein', 'corr_diff',
    'counts', 'skew', 'cancer_gene'
]
drug_res_filtered[cols].to_csv(
    "../../result_files/lm_box/drug_tissue_sensitive_associations_filtered_diann_supptable_051021.csv",
    index=False)

In [64]:
drug_res_filtered[["ID", "drug_name", "GDSC"]] = drug_res_filtered["drug_id"].str.split(pat=";", expand=True)

In [75]:
drug_res_filtered[["ID", "drug_name", "GDSC"]+cols].to_csv(
    "../../result_files/lm_box/drug_tissue_sensitive_associations_filtered_diann_supptable_051021.csv",
    index=False)

In [14]:
drug_res_filtered = pd.read_csv(
    "../../result_files/drug_tissue/drug_tissue_sensitive_associations_filtered_10pc.csv")

In [53]:
drug_res_filtered.query(
    "(beta < -0.15 and counts >= 30 and corr_diff >= 0.3) or cancer_gene == True"
)[cols].shape

(20, 17)

In [76]:
drug_res_filtered.query(
    "(beta < -0.15 and counts >= 30 and corr_diff >= 0.3) or cancer_gene == True"
)[["ID", "drug_name", "GDSC"]+cols].to_csv(
    "../../result_files/lm_box/drug_tissue_sensitive_associations_filtered_diann_supptable_051021_top20.csv",
    index=False)