In [None]:
%%capture
# import all the needed libraries
import pandas as pd
from depiction.models.paccmann import PaccMannSmiles, PaccMannCellLine
from depiction.core import Task, DataType
from depiction.interpreters.uw_model import UWModel

## Data

In [None]:
# Parse data from GDSC
# drugs
drugs = pd.read_csv(
    '../data/paccmann/gdsc.smi', sep='\t',
    index_col=1, header=None,
    names=['smiles']
)
# cell lines
cell_lines = pd.read_csv('../data/paccmann/gdsc.csv.gz', index_col=1)
genes = cell_lines.columns[3:].tolist()
# sensitivity data
drug_sensitivity = pd.read_csv('../data/paccmann/gdsc_sensitivity.csv.gz', index_col=0)

## Interpretability on the drug level for a cell line of interest

In [None]:
task = Task.CLASSIFICATION
data_type = DataType.TEXT
class_names = ['Not Effective', 'Effective']

In [None]:
selected_cell_line = 'NCI-H1648'
selected_drug_sensitivity = drug_sensitivity[drug_sensitivity['cell_line'] == selected_cell_line]
selected_drugs = drugs.loc[selected_drug_sensitivity['drug'].tolist()].dropna()
selected_drug_sensitivity = selected_drug_sensitivity.set_index('drug').loc[selected_drugs.index.unique()]

In [None]:
classifier = PaccMannSmiles(cell_lines.loc[selected_cell_line][genes].values)

In [None]:
interpreter = 'lime'
explanation_configs = {
    'labels': (1,),
}
interpreter_params = {
    'class_names': class_names,
    'split_expression': list,
    'bow': False,
    'char_level': True
}
explainer = UWModel(interpreter, task, data_type, explanation_configs, **interpreter_params)

In [None]:
explainer.interpret(classifier.predict, drugs.loc['Embelin'].item())

## Interpretability on the cell line level for a drug of interest

In [None]:
task = Task.CLASSIFICATION
data_type = DataType.TABULAR
class_names = ['Not Effective', 'Effective']

In [None]:
selected_drug = 'Rapamycin'
selected_drug_sensitivity = drug_sensitivity[drug_sensitivity['drug'] == selected_drug]
selected_cell_lines = cell_lines.loc[selected_drug_sensitivity['cell_line'].tolist()].dropna()
selected_drug_sensitivity = selected_drug_sensitivity.set_index('cell_line').loc[selected_cell_lines.index.unique()]

In [None]:
classifier = PaccMannCellLine(drugs.loc[selected_drug].item())

In [None]:
interpreter = 'lime'
explanation_configs = {
    'labels': (1,),
}
interpreter_params = {
    'training_data': selected_cell_lines[genes].values,
    'training_labels': selected_drug_sensitivity['effective'].values,
    'feature_names': genes,
    'class_names': class_names,
    'discretize_continuous': False,
    'sample_around_instance': True
}
explainer = UWModel(interpreter, task, data_type, explanation_configs, **interpreter_params)

In [None]:
explainer.interpret(classifier.predict, cell_lines.loc[selected_cell_line][genes].values)