In [2]:
import pandas as pd
import numpy as np
import gseapy as gp
import matplotlib.pyplot as plt
from utils_decoding import *
from utils_preproc import *

In [3]:
gene_sets = 'data/h.all.v7.5.1.symbols.gmt'

In [4]:
branch = '117'
path = 'root_folder/root_' + branch

In [5]:
cell_dict, cell_feature, genes = save_gene_expr_matrix_X(filter_by_l1000=True)
genes = genes.values
genes

number of preserved landmark genes: 956


array(['ABCB6', 'ABCC5', 'ABCF1', 'ABCF3', 'ABHD4', 'ABHD6', 'ABL1',
       'ACAA1', 'ACAT2', 'ACBD3', 'ACD', 'ACLY', 'ACOT9', 'ADAM10',
       'ADAT1', 'ADGRE5', 'ADGRG1', 'ADH5', 'ADI1', 'ADO', 'ADRB2', 'AGL',
       'AKAP8', 'AKAP8L', 'AKR7A2', 'AKT1', 'ALAS1', 'ALDH7A1', 'ALDOA',
       'ALDOC', 'AMDHD2', 'ANKRD10', 'ANO10', 'ANXA7', 'APBB2', 'APOE',
       'APP', 'APPBP2', 'ARFIP2', 'ARHGAP1', 'ARHGEF12', 'ARHGEF2',
       'ARID4B', 'ARID5B', 'ARL4C', 'ARNT2', 'ARPP19', 'ASAH1', 'ASCC3',
       'ATF1', 'ATF5', 'ATF6', 'ATG3', 'ATMIN', 'ATP11B', 'ATP1B1',
       'ATP2C1', 'ATP6V0B', 'ATP6V1D', 'AURKA', 'AURKB', 'AXIN1',
       'B4GAT1', 'BACE2', 'BAD', 'BAG3', 'BAMBI', 'BAX', 'BCL2', 'BCL7B',
       'BDH1', 'BECN1', 'BHLHE40', 'BID', 'BIRC2', 'BIRC5', 'BLCAP',
       'BLMH', 'BLVRA', 'BMP4', 'BNIP3', 'BNIP3L', 'BPHL', 'BRCA1', 'BTK',
       'BUB1B', 'BZW2', 'C2CD2', 'C2CD2L', 'C2CD5', 'C5', 'CAB39',
       'CALM3', 'CALU', 'CAMSAP2', 'CANT1', 'CAPN1', 'CARMIL1', 'CASC3',
       'CA

In [6]:
model_name = 'GAT_Edge'
ss_path = path + '/Saliency/IG/CellLine/' + model_name + '/iqr_mean_baseline'

drug_dict, sal_dict =  make_gene_ss_dict(ss_path, type='drug')
norm_sal_dict = normalize_ss(sal_dict)
rank_dict = rank_ss(norm_sal_dict)

In [7]:
drug = 'Afatinib' #'Erlotinib'
norm_sal_dict[drug].shape

(1, 1, 956)

In [8]:
gsea_data = norm_sal_dict[drug].squeeze()
gsea_df = pd.DataFrame(gsea_data, index=genes.tolist(), columns=['ss'])
gsea_df = gsea_df.sort_values(by='ss', ascending=False)
gsea_df

Unnamed: 0,ss
HMGA2,1.000000
TERT,0.785352
HSPA1A,0.657373
MUC1,0.657115
IGFBP3,0.632132
...,...
ZNF131,0.002029
ZNF586,0.001957
ZNF451,0.001901
ZNF589,0.001508


In [9]:
gsea = gp.ssgsea(data=gsea_df, gene_sets=gene_sets)
gsea.res2d

Unnamed: 0,Name,Term,ES,NES
0,ss,HALLMARK_KRAS_SIGNALING_UP,356.802113,0.696161
1,ss,HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION,350.499277,0.683863
2,ss,HALLMARK_INFLAMMATORY_RESPONSE,316.083945,0.616715
3,ss,HALLMARK_ALLOGRAFT_REJECTION,268.760899,0.524382
4,ss,HALLMARK_APICAL_JUNCTION,268.49684,0.523867
5,ss,HALLMARK_UV_RESPONSE_DN,255.412244,0.498338
6,ss,HALLMARK_TNFA_SIGNALING_VIA_NFKB,254.821903,0.497186
7,ss,HALLMARK_COMPLEMENT,237.324081,0.463046
8,ss,HALLMARK_IL2_STAT5_SIGNALING,233.786443,0.456143
9,ss,HALLMARK_ESTROGEN_RESPONSE_EARLY,219.843887,0.42894


In [10]:
drugs = ['Afatinib', 'Imatinib', 'Sunitinib']
for drug in drugs:
    gsea_data = norm_sal_dict[drug].squeeze()
    gsea_df = pd.DataFrame(gsea_data, index=genes.tolist(), columns=['ss'])
    gsea_df = gsea_df.sort_values(by='ss', ascending=False)
    gsea = gp.ssgsea(data=gsea_df, gene_sets=gene_sets)
    print(gsea.res2d)
    res_df = gsea.res2d
    res_df.to_csv(path + '/Saliency/IG/CellLine/' + model_name + '/' + drug + '.csv', header=True)

   Name                                        Term          ES       NES
0    ss                  HALLMARK_KRAS_SIGNALING_UP  356.802113  0.696161
1    ss  HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION  350.499277  0.683863
2    ss              HALLMARK_INFLAMMATORY_RESPONSE  316.083945  0.616715
3    ss                HALLMARK_ALLOGRAFT_REJECTION  268.760899  0.524382
4    ss                    HALLMARK_APICAL_JUNCTION   268.49684  0.523867
5    ss                     HALLMARK_UV_RESPONSE_DN  255.412244  0.498338
6    ss            HALLMARK_TNFA_SIGNALING_VIA_NFKB  254.821903  0.497186
7    ss                         HALLMARK_COMPLEMENT  237.324081  0.463046
8    ss                HALLMARK_IL2_STAT5_SIGNALING  233.786443  0.456143
9    ss            HALLMARK_ESTROGEN_RESPONSE_EARLY  219.843887   0.42894
10   ss             HALLMARK_ESTROGEN_RESPONSE_LATE  215.626765  0.420712
11   ss                          HALLMARK_APOPTOSIS  208.745911  0.407287
12   ss          HALLMARK_INTERFERON_G