# Using *spotter* for splicing dependency analysis and exploring exon-drug associations

In [1]:
import os
import pandas as pd
from target_spotter import SplicingDependency, DrugAssociation

## Load data

In [2]:
# paths
ROOT = os.path.dirname(os.getcwd())
DATA_DIR = os.path.join(ROOT,"data","examples","CCLE")
splicing_file = os.path.join(DATA_DIR, "splicing_EX.tsv.gz")
genexpr_file = os.path.join(DATA_DIR, "genexpr.tsv.gz")

# read
splicing = pd.read_table(splicing_file).set_index("EVENT") # PSI
genexpr = pd.read_table(genexpr_file).set_index("ID") # TPM

In [3]:
splicing.head()

Unnamed: 0_level_0,ACH-000415,ACH-000894,ACH-000422,ACH-000358,ACH-000468,ACH-000502,ACH-000609,ACH-000636,ACH-000715,ACH-000653,...,ACH-000969,ACH-000277,ACH-000036,ACH-000197,ACH-000208,ACH-000359,ACH-000440,ACH-000804,ACH-000174,ACH-000934
EVENT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HsaEX0067681,1.77,1.94,1.18,7.3,0.98,0.0,,8.12,1.67,,...,1.75,0.0,0.69,,2.95,3.83,,2.73,2.25,
HsaEX6078702,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
HsaEX0056692,5.88,38.53,24.35,9.09,16.96,13.33,11.69,1.79,9.63,23.47,...,10.3,12.32,17.07,20.83,22.67,36.0,8.36,40.81,18.92,
HsaEX0056690,88.89,93.85,83.78,87.65,91.11,94.2,93.33,83.87,78.12,95.04,...,86.14,91.26,95.83,,91.3,95.35,94.31,95.4,95.83,
HsaEX0056691,100.0,100.0,96.72,100.0,95.24,100.0,100.0,98.78,97.94,97.98,...,98.93,99.09,96.96,100.0,100.0,92.52,100.0,100.0,100.0,100.0


In [4]:
genexpr.head()

Unnamed: 0_level_0,ACH-000415,ACH-000894,ACH-000422,ACH-000358,ACH-000468,ACH-000502,ACH-000609,ACH-000636,ACH-000715,ACH-000653,...,ACH-000969,ACH-000277,ACH-000036,ACH-000197,ACH-000208,ACH-000359,ACH-000440,ACH-000804,ACH-000174,ACH-000934
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000000003,4.389567,7.281791,5.064366,6.165912,3.939227,3.806324,0.584963,3.720278,3.510962,0.505891,...,4.479619,1.992768,5.307064,1.144046,4.950935,3.885574,0.632268,5.334497,5.068241,2.62527
ENSG00000000005,0.0,0.014355,0.555816,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.028569,0.0,0.0,0.028569,0.0
ENSG00000000419,5.147714,6.803744,6.841596,5.928607,6.625417,7.470537,6.926237,6.569248,6.887647,6.49873,...,7.33405,8.805421,7.083958,6.962318,6.46238,7.110509,6.588715,7.126291,7.679973,7.010444
ENSG00000000457,1.0,2.469886,2.931683,2.726831,1.963474,2.555816,1.778209,3.30305,2.090853,3.195348,...,3.145677,3.533563,2.361768,3.553361,2.400538,2.124328,3.943921,3.001802,2.060047,2.833902
ENSG00000000460,1.555816,3.811471,3.834913,4.347666,3.228049,2.925999,3.169925,4.613532,2.589763,4.125155,...,3.270529,3.904002,1.978196,5.164706,3.646163,3.626439,3.8166,4.672425,3.62527,3.538538


## Predict drug sensitivity

### Compute splicing dependency

In [5]:
estimator = SplicingDependency()
spldep_means, max_harm_score_means = estimator.predict(splicing, genexpr)
spldep_means

Loading defaults...
Preprocessing inputs...
Standardizing data...
Computing splicing dependencies...


100%|██████████████████████| 1073/1073 [00:04<00:00, 260.62it/s]


Unnamed: 0,ACH-000422,ACH-000902,ACH-000306,ACH-000690,ACH-000468,ACH-000054,ACH-000808,ACH-000230,ACH-000764,ACH-000430,...,ACH-000197,ACH-000502,ACH-000277,ACH-000629,ACH-000609,ACH-000969,ACH-000979,ACH-000173,ACH-000804,ACH-000414
HsaEX6065058,-0.077509,,0.054858,,,,,,,,...,,,,,,,,,,
HsaEX6065028,,0.126410,,,,,,,,,...,,,,,,0.166987,,,,
HsaEX6008208,,,,0.013938,,,,,,,...,,,,,,,,,,
HsaEX1001338,,,,,,,,,,,...,,,,,,,,-0.076295,,
HsaEX0001886,,,0.076398,,,,,,,,...,,0.077345,,,,0.118040,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HsaEX0014912,-0.049158,-0.057203,-0.008287,-0.005279,0.004546,-0.077286,-0.015926,-0.058265,-0.042378,0.047746,...,0.064695,0.025942,-0.043506,-0.014887,0.029331,-0.000205,0.007651,-0.051148,-0.013108,-0.033608
HsaEX0073989,,,,-0.059274,,,,,,,...,,,-0.021731,,,0.034737,,,-0.023698,
HsaEX7109159,-0.404623,-0.533333,-0.405545,-0.388614,-0.304486,-0.302511,-0.358239,-0.504522,-0.252468,-0.290842,...,-0.339231,-0.486995,-0.436812,-0.270211,-0.276389,-0.324466,-0.309202,-0.446593,-0.351985,-0.280428
HsaEX7109160,-0.401836,-0.534311,-0.405889,-0.386493,-0.305841,-0.301775,-0.362244,-0.508496,-0.252709,-0.287013,...,-0.338605,-0.498231,-0.435643,-0.269763,-0.276308,-0.324961,-0.313062,-0.444978,-0.346914,-0.278123


### Compute drug sensitivity

In [11]:
datasets = ["GDSC1","GDSC2"]
ic50_by_drugs = []
ic50_by_exons = []
for dataset in datasets:
    print(dataset)
    estimator = DrugAssociation()
    ic50_by_drug, ic50_by_exon = estimator.predict(spldep_means, dataset=dataset)
    ic50_by_drugs.append(ic50_by_drug)
    ic50_by_exons.append(ic50_by_exon)

ic50_by_drugs = pd.concat(ic50_by_drugs)
ic50_by_exons = pd.concat(ic50_by_exons)

GDSC1
Loading defaults...
Preprocessing inputs...
Estimating drug responses...
GDSC2
Loading defaults...
Preprocessing inputs...
Estimating drug responses...


In [12]:
ic50_by_drugs

Unnamed: 0,dataset,ID,sample,predicted_ic50
0,GDSC1,1001_2000.0,ACH-000422,3.715535
1,GDSC1,1001_2000.0,ACH-000902,3.957210
2,GDSC1,1001_2000.0,ACH-000306,3.630535
3,GDSC1,1001_2000.0,ACH-000690,3.552191
4,GDSC1,1001_2000.0,ACH-000468,3.557627
...,...,...,...,...
45,GDSC2,2172_10.0,ACH-000969,0.000000
46,GDSC2,2172_10.0,ACH-000979,0.000000
47,GDSC2,2172_10.0,ACH-000173,0.000000
48,GDSC2,2172_10.0,ACH-000804,0.000000


In [13]:
ic50_by_exons

Unnamed: 0,dataset,ID,EVENT,ENSEMBL,GENE,ACH-000422,ACH-000902,ACH-000306,ACH-000690,ACH-000468,...,ACH-000197,ACH-000502,ACH-000277,ACH-000629,ACH-000609,ACH-000969,ACH-000979,ACH-000173,ACH-000804,ACH-000414
0,GDSC1,1_2.0,HsaEX6065058,ENSG00000175899,A2M,2.675744,,1.370171,,,...,,,,,,,,,,
1,GDSC1,1_2.0,HsaEX6065028,ENSG00000166535,A2ML1,,,,,,...,,,,,,,,,,
2,GDSC1,1_2.0,HsaEX6008208,ENSG00000179869,ABCA13,,,,2.571869,,...,,,,,,,,,,
3,GDSC1,1_2.0,HsaEX1001338,ENSG00000005471,ABCB4,,,,,,...,,,,,,,,2.436288,,
4,GDSC1,1_2.0,HsaEX0001886,ENSG00000159640,ACE,,,2.479506,,,...,,2.716123,,,,2.633315,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187770,GDSC2,2172_10.0,HsaEX0014912,ENSG00000153774,CFDP1,,,,,,...,,,,,,,,,,
187771,GDSC2,2172_10.0,HsaEX0073989,ENSG00000161551,ZNF577,,,,,,...,,,,,,,,,,
187772,GDSC2,2172_10.0,HsaEX7109159,ENSG00000239900,ADSL,,,,,,...,,,,,,,,,,
187773,GDSC2,2172_10.0,HsaEX7109160,ENSG00000239900,ADSL,,,,,,...,,,,,,,,,,
