[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/MiqG/target_spotter/blob/main/notebooks/colab_pipeline.ipynb)

# Run *spotter* pipelines interactively

In [1]:
#@markdown ## Load vast-tools output(s), select your options, then hit `Runtime` -> `Run All`
from google.colab import files
import pandas as pd

ModuleNotFoundError: No module named 'google.colab'

In [2]:
#@markdown ### Load exon inclusion (PSI) table
splicing_file = list(files.upload().keys())[0]
splicing = pd.read_table(splicing_file)
splicing

In [2]:
#@markdown ### Load gene expression (TPM or Counts) table
#@markdown Select your file corresponding to gene expression as TPM (default); in case you supply raw gene expression counts, please modify the parameter "Gene Expression Data Type" accordingly.
#@markdown Make sure your gene expression data is **not** log-transformed.
genexpr_file = list(files.upload().keys())[0]
genexpr = pd.read_table(genexpr_file)
genexpr

## Pipeline settings

In [None]:
#@markdown What type of gene expression data did you input?
genexpr_data_type = "TPM" #@param ["TPM","Counts"]

#@markdown What is the name of the column containing splicing event identifiers from [`VastDB`](https://vastdb.crg.eu/wiki/Main_Page)?
event_col = "EVENT" #@param {type: "string"}

#@markdown What is the name of the column containing gene ensembl identifiers?
gene_col = "ID" #@param {type: "string"}

# process settings
## Gene expression
if genexpr_data_type=="":
    normalize_counts=False
    log_transform=True

elif genexpr_data_type=="Counts":
    normalize_counts=True
    log_transform=False

## create indexes and drop any column that is not numeric
### splicing
splicing = splicing.set_index(event_col).copy()
splicing = splicing._get_numeric_data().copy()

### genexpr
genexpr = genexpr.set_index(event_col).copy()
genexpr = genexpr._get_numeric_data().copy()

In [11]:
#@markdown ## Install dependencies
!pip install target_spotter

[0m

In [None]:
#@markdown ## Run *spotter*

from target_spotter import SplicingDependency, DrugAssociation

# compute splicing dependency
estimator_spldep = SplicingDependency(normalize_counts=normalize_counts, log_transform=log_transform)
spldep_means, max_harm_score_means = estimator_spldep.predict(splicing, genexpr)

# compute drug sensitivity
estimator = DrugAssociation()
ic50_by_drug, _ = estimator.predict(spldep_means)

In [None]:
#@markdown ### Inspect predicted splicing dependencies
spldep_means

In [None]:
#@markdown ### Inspect predicted maximum harm scores
max_harm_score_mean

In [None]:
#@markdown ### Inspect predicted drug sensitivities of each sample
ic50_by_drug

## Package and download results

In [10]:
from datetime import datetime
import shutil
from target_spotter.utils import prep_for_webapp

# save resulting tables
output_dir="%s-results" % datetime.now().strftime("%Y%m%d%H%M%S")
SAVE_PARAMS = {"sep":"\t", "index":False, "compression":"gzip"}

os.makedirs(output_dir)
spldep_means.reset_index().to_csv(os.path.join(output_dir,"spldep.tsv.gz"), **SAVE_PARAMS)
max_harm_score_means.reset_index().to_csv(os.path.join(output_dir,"max_harm.tsv.gz"), **SAVE_PARAMS)
ic50_by_drug.reset_index().to_csv(os.path.join(output_dir,"ic50_by_drug.tsv.gz"), **SAVE_PARAMS)

# prepare results as sql to be uploaded to the web app
prep_for_webapp(splicing, genexpr, spldep_means, max_harm_score_means, ic50_by_drug, os.path.join(output_dir,"webapp_inputs"))

# package resulting tables into a .zip file that will be downloaded
shutil.make_archive(output_dir, "zip", output_dir)

print("Done!")