[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/MiqG/target_spotter/blob/main/notebooks/colab_pipeline.ipynb)

# Run *spotter* pipelines interactively

1. Select pipeline settings below
2. Hit `Runtime` -> `Run All`
3. Load your splicing and gene expression tables
4. Wait :)
5. Explore your outputs at https://spotter.crg.eu/explore and selecting "Custom Dataset"

In [13]:
#@title ## Pipeline settings
#@markdown What type of gene expression data did you input?
genexpr_data_type = "TPM" #@param ["TPM","Counts"]

#@markdown What is the name of the column containing splicing event identifiers from [`VastDB`](https://vastdb.crg.eu/wiki/Main_Page)?
event_col = "EVENT" #@param {type: "string"}

#@markdown What is the name of the column containing gene ensembl identifiers?
gene_col = "ID" #@param {type: "string"}

# process settings
## Gene expression
if genexpr_data_type=="TPM":
    normalize_counts=False
    log_transform=True

elif genexpr_data_type=="Counts":
    normalize_counts=True
    log_transform=False

In [3]:
#@title ## Load vast-tools output(s)
from google.colab import files
import pandas as pd

In [4]:
#@title ### Load exon inclusion (PSI) table
splicing_file = list(files.upload().keys())[0]
splicing = pd.read_table(splicing_file)
splicing

Saving splicing_EX.tsv.gz to splicing_EX.tsv (2).gz


Unnamed: 0,EVENT,ACH-000415,ACH-000894,ACH-000422,ACH-000358,ACH-000468,ACH-000502,ACH-000609,ACH-000636,ACH-000715,...,ACH-000969,ACH-000277,ACH-000036,ACH-000197,ACH-000208,ACH-000359,ACH-000440,ACH-000804,ACH-000174,ACH-000934
0,HsaEX0067681,1.77,1.94,1.18,7.30,0.98,0.00,,8.12,1.67,...,1.75,0.00,0.69,,2.95,3.83,,2.73,2.25,
1,HsaEX6078702,100.00,100.00,100.00,100.00,100.00,100.00,100.00,100.00,100.00,...,100.00,100.00,100.00,100.00,100.00,100.00,100.00,100.00,100.00,100.0
2,HsaEX0056692,5.88,38.53,24.35,9.09,16.96,13.33,11.69,1.79,9.63,...,10.30,12.32,17.07,20.83,22.67,36.00,8.36,40.81,18.92,
3,HsaEX0056690,88.89,93.85,83.78,87.65,91.11,94.20,93.33,83.87,78.12,...,86.14,91.26,95.83,,91.30,95.35,94.31,95.40,95.83,
4,HsaEX0056691,100.00,100.00,96.72,100.00,95.24,100.00,100.00,98.78,97.94,...,98.93,99.09,96.96,100.00,100.00,92.52,100.00,100.00,100.00,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164648,HsaEX7005665,,100.00,,,,,,,,...,,100.00,,,,,,,,
164649,HsaEX7007688,,,,,,,,,,...,,,,,,,,,,
164650,HsaEX7007687,,,,,,,,,,...,,,,,,,,,,
164651,HsaEX7001979,,,,100.00,,,,,,...,,,,,,,,,,


In [5]:
#@title ### Load gene expression (TPM or Counts) table
#@markdown Select your file corresponding to gene expression as TPM (default); in case you supply raw gene expression counts, please modify the parameter "Gene Expression Data Type" accordingly.
#@markdown Make sure your gene expression data is **not** log-transformed.
genexpr_file = list(files.upload().keys())[0]
genexpr = pd.read_table(genexpr_file)
genexpr

Saving genexpr.tsv.gz to genexpr.tsv (1).gz


Unnamed: 0,ID,ACH-000415,ACH-000894,ACH-000422,ACH-000358,ACH-000468,ACH-000502,ACH-000609,ACH-000636,ACH-000715,...,ACH-000969,ACH-000277,ACH-000036,ACH-000197,ACH-000208,ACH-000359,ACH-000440,ACH-000804,ACH-000174,ACH-000934
0,ENSG00000000003,4.389567,7.281791,5.064366,6.165912,3.939227,3.806324,0.584963,3.720278,3.510962,...,4.479619,1.992768,5.307064,1.144046,4.950935,3.885574,0.632268,5.334497,5.068241,2.625270
1,ENSG00000000005,0.000000,0.014355,0.555816,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.028569,0.000000,0.000000,0.028569,0.000000
2,ENSG00000000419,5.147714,6.803744,6.841596,5.928607,6.625417,7.470537,6.926237,6.569248,6.887647,...,7.334050,8.805421,7.083958,6.962318,6.462380,7.110509,6.588715,7.126291,7.679973,7.010444
3,ENSG00000000457,1.000000,2.469886,2.931683,2.726831,1.963474,2.555816,1.778209,3.303050,2.090853,...,3.145677,3.533563,2.361768,3.553361,2.400538,2.124328,3.943921,3.001802,2.060047,2.833902
4,ENSG00000000460,1.555816,3.811471,3.834913,4.347666,3.228049,2.925999,3.169925,4.613532,2.589763,...,3.270529,3.904002,1.978196,5.164706,3.646163,3.626439,3.816600,4.672425,3.625270,3.538538
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39977,ENSG00000284587,0.000000,0.000000,1.550901,1.709291,0.000000,0.000000,0.782409,1.918386,0.000000,...,1.695994,2.042644,1.948601,0.000000,0.000000,0.000000,1.427606,1.636915,1.735522,2.965323
39978,ENSG00000284594,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.970854,1.906891,0.000000,0.000000,0.000000,0.000000
39979,ENSG00000284595,0.475085,2.046142,2.871844,3.246408,2.046142,0.000000,1.007196,3.261531,0.748461,...,1.748461,2.169925,1.589763,3.936402,1.659925,1.618239,0.839960,2.632268,2.752749,3.032101
39980,ENSG00000284596,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,1.722466,0.000000,0.000000,0.000000,1.490570,0.000000,0.000000,0.000000


In [6]:
#@title ### Preprocess inputs
## create indexes and drop any column that is not numeric
### splicing
splicing = splicing.set_index(event_col).copy()
splicing = splicing._get_numeric_data().copy()

### genexpr
genexpr = genexpr.set_index(gene_col).copy()
genexpr = genexpr._get_numeric_data().copy()

In [7]:
#@title ## Install dependencies
#!pip install target_spotter

# download
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1psFjQGh40j1wiJbGIagphZFDhvrjSgRt' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1psFjQGh40j1wiJbGIagphZFDhvrjSgRt" -O "target_spotter.zip" && rm -rf /tmp/cookies.txt

# unzip
!unzip target_spotter.zip

--2023-02-21 11:29:39--  https://docs.google.com/uc?export=download&confirm=t&id=1psFjQGh40j1wiJbGIagphZFDhvrjSgRt
Resolving docs.google.com (docs.google.com)... 142.250.157.113, 142.250.157.139, 142.250.157.102, ...
Connecting to docs.google.com (docs.google.com)|142.250.157.113|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-0c-24-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/m92ju05kus5jkm5mk4h5c1kpfj98ead6/1676978925000/07651711738842447463/*/1psFjQGh40j1wiJbGIagphZFDhvrjSgRt?e=download&uuid=081e1a5c-f54a-4593-aa28-297d2d520c93 [following]
--2023-02-21 11:29:40--  https://doc-0c-24-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/m92ju05kus5jkm5mk4h5c1kpfj98ead6/1676978925000/07651711738842447463/*/1psFjQGh40j1wiJbGIagphZFDhvrjSgRt?e=download&uuid=081e1a5c-f54a-4593-aa28-297d2d520c93
Resolving doc-0c-24-docs.googleusercontent.com (doc-0c-24-docs.googleusercontent.com)... 108.177.125.

In [15]:
#@title ## Run *spotter*

from target_spotter.target_spotter import SplicingDependency, DrugAssociation

# compute splicing dependency
estimator_spldep = SplicingDependency(normalize_counts=normalize_counts, log_transform=log_transform)
spldep_means, max_harm_score_means = estimator_spldep.predict(splicing, genexpr)

# compute drug sensitivity
datasets = ["GDSC1","GDSC2"]
ic50_by_drugs = []
ic50_by_exons = []
for dataset in datasets:
    print(dataset)
    estimator = DrugAssociation()
    ic50_by_drug, _ = estimator.predict(spldep_means, dataset=dataset)
    ic50_by_drugs.append(ic50_by_drug)

ic50_by_drugs = pd.concat(ic50_by_drugs)

Loading defaults...
Preprocessing inputs...
Transforming TPM into log2(TPM+1)...
Standardizing data...
Computing splicing dependencies...


100%|██████████| 1073/1073 [00:05<00:00, 203.56it/s]


Loading defaults...
Preprocessing inputs...
Estimating drug responses...


In [16]:
#@title ### Inspect predicted splicing dependencies
spldep_means

Unnamed: 0,ACH-000359,ACH-000764,ACH-000808,ACH-000173,ACH-000430,ACH-000415,ACH-000468,ACH-001210,ACH-000690,ACH-000302,...,ACH-000422,ACH-000968,ACH-000056,ACH-000889,ACH-000979,ACH-000080,ACH-000969,ACH-000609,ACH-000902,ACH-000502
HsaEX6065058,,,,,,,,,,,...,-0.040081,,,,,,,,,
HsaEX6065028,,,,,,0.193381,,,,,...,,,,,,,0.217218,,0.204919,
HsaEX6008208,,,,,,,,,-0.008172,,...,,,,,,,,,,
HsaEX1001338,,,,-0.050978,,,,,,,...,,-0.047012,-0.043280,,,,,,,
HsaEX0001886,,,,,,,,,,,...,,,,,,,0.114572,,,0.067759
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HsaEX0014912,-0.260924,-0.252428,-0.220367,-0.242366,-0.212531,-0.245249,-0.232147,-0.240077,-0.242709,-0.258120,...,-0.254237,-0.264767,-0.249137,-0.220291,-0.216341,-0.264124,-0.199166,-0.232808,-0.239490,-0.228163
HsaEX0073989,,,,,,,,,-0.062916,-0.022094,...,,,,-0.058150,,,0.012486,,,
HsaEX7109159,-0.702621,-0.629801,-0.656917,-0.709969,-0.640269,-0.723654,-0.635783,-0.730949,-0.705672,-0.679105,...,-0.684436,-0.627481,-0.632174,-0.653157,-0.643026,-0.655282,-0.656931,-0.635622,-0.712520,-0.717818
HsaEX7109160,-0.704876,-0.630171,-0.661025,-0.708445,-0.636560,-0.725917,-0.637252,-0.728086,-0.703660,-0.684115,...,-0.681745,-0.629041,-0.634266,-0.653748,-0.647000,-0.655739,-0.657540,-0.635665,-0.713559,-0.729133


In [19]:
#@title ### Inspect predicted maximum harm scores
max_harm_score_means

Unnamed: 0,ACH-000359,ACH-000764,ACH-000808,ACH-000173,ACH-000430,ACH-000415,ACH-000468,ACH-001210,ACH-000690,ACH-000302,...,ACH-000422,ACH-000056,ACH-000889,ACH-000979,ACH-000609,ACH-000080,ACH-000969,ACH-000968,ACH-000902,ACH-000502
HsaEX0043288,,,,,,,,,,,...,,,,,,-1.769692,,,,
HsaEX0007182,-61.871150,-60.569659,-61.396445,-60.864804,-59.835360,-66.503026,-60.724417,-59.188229,-61.672261,-63.423596,...,-62.288610,-62.425499,-58.967496,-58.627221,-66.604054,-60.911597,-62.955427,-65.280568,-62.763928,-58.328079
HsaEX0022086,-148.853069,-148.901432,-147.766692,-148.732977,-147.617069,-152.360907,-146.011008,-150.292837,-149.058209,-150.165898,...,-147.575645,-151.520496,-150.566472,-145.754244,-148.296564,-149.605132,-147.603190,-149.045818,-148.928955,-146.412552
HsaEX0050345,-11.304903,-1.907548,-20.342289,-5.322582,-9.934646,-5.931536,-5.042996,-13.420249,-15.125619,-10.664651,...,-5.141186,-6.294138,-2.444732,-24.123765,-2.626240,-2.652163,-8.361643,-9.874888,-26.321484,0.000000
HsaEX6059151,,,,,,,,,,,...,,,-8.156605,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HsaEX1037792,,,,,,,,,,,...,,,-4.252674,,,,,,,
HsaEX0052463,,,,,-0.013914,-0.066086,,,,,...,,,-0.348131,,,,,,,
HsaEX0057577,-136.859313,-136.258797,-136.417121,-138.433202,-135.496021,-140.385633,-136.971706,-134.481439,-134.888354,-139.256600,...,-135.872234,-139.993665,-134.422788,-135.883101,-134.966235,-135.522795,-138.297631,-137.728838,-140.496302,-135.705550
HsaEX0042776,-82.597608,-80.656636,-83.914924,-80.892565,-81.763247,-89.159024,-81.697205,-81.476983,-80.996970,-88.251338,...,-81.965483,-81.014982,-80.422318,-83.624719,-84.896640,-80.815971,-78.954059,-82.303517,-79.683323,-83.686407


In [20]:
#@title ### Inspect predicted drug sensitivities of each sample
ic50_by_drugs

Unnamed: 0,dataset,ID,sample,predicted_ic50
0,GDSC1,1001_2000.0,ACH-000359,3.418828
1,GDSC1,1001_2000.0,ACH-000764,3.469560
2,GDSC1,1001_2000.0,ACH-000808,3.732448
3,GDSC1,1001_2000.0,ACH-000173,3.898708
4,GDSC1,1001_2000.0,ACH-000430,3.466658
...,...,...,...,...
45,GDSC1,9_1.0,ACH-000080,0.088692
46,GDSC1,9_1.0,ACH-000969,0.117227
47,GDSC1,9_1.0,ACH-000968,0.083655
48,GDSC1,9_1.0,ACH-000902,0.174400


In [23]:
#@title ## Package and download results

import os
from datetime import datetime
import shutil
from target_spotter.target_spotter.utils import prep_for_webapp

# save resulting tables
output_dir="%s-results" % datetime.now().strftime("%Y%m%d%H%M%S")
SAVE_PARAMS = {"sep":"\t", "index":False, "compression":"gzip"}

os.makedirs(output_dir)
spldep_means.reset_index().to_csv(os.path.join(output_dir,"spldep.tsv.gz"), **SAVE_PARAMS)
max_harm_score_means.reset_index().to_csv(os.path.join(output_dir,"max_harm.tsv.gz"), **SAVE_PARAMS)
ic50_by_drugs.reset_index().to_csv(os.path.join(output_dir,"ic50_by_drugs.tsv.gz"), **SAVE_PARAMS)

# prepare results as sql to be uploaded to the web app
prep_for_webapp(splicing, genexpr, spldep_means, max_harm_score_means, ic50_by_drugs, os.path.join(output_dir,"webapp_inputs"))

# package resulting tables into a .zip file that will be downloaded
shutil.make_archive(output_dir, "zip", output_dir)

# download results
files.download(output_dir+".zip")

print("Done!")

sqlite:///20230221113557-results/webapp_inputs/splicing.sql
Saved sqlite:///20230221113557-results/webapp_inputs/splicing.sql
sqlite:///20230221113557-results/webapp_inputs/genexpr.sql
Saved sqlite:///20230221113557-results/webapp_inputs/genexpr.sql
sqlite:///20230221113557-results/webapp_inputs/spldep.sql
Saved sqlite:///20230221113557-results/webapp_inputs/spldep.sql
sqlite:///20230221113557-results/webapp_inputs/max_harm.sql
Saved sqlite:///20230221113557-results/webapp_inputs/max_harm.sql
sqlite:///20230221113557-results/webapp_inputs/pred_ic50.sql
Saved sqlite:///20230221113557-results/webapp_inputs/pred_ic50.sql
Compressing Webapp inputs...
Done!


Enjoy your results! Note that inside the zipped folder you will find "webapp_inputs.zip", ready to be explored in https://spotter.crg.eu.