In [None]:
import flexynesis 
import torch, os
torch.set_num_threads(12)

# Modeling Drug Response Using Publicly Available Pharmocogenomics Datasets

We build models using CCLE data and make predictions on GDSC data. 

# Download Data

In [None]:
if not os.path.exists("dataset1"):
    !wget -O ccle_gdsc.tgz "https://bimsbstatic.mdc-berlin.de/akalin/buyar/flexynesis-benchmark-datasets/dataset1.tgz" && tar -xzvf ccle_gdsc.tgz

## Define Data Importer 

In [None]:
data_importer = flexynesis.DataImporter(path ='./dataset1/', 
                                       data_types = ['gex', 'cnv'], 
                                        concatenate=False, min_features=1000, top_percentile=0.2)

In [None]:
train_dataset, test_dataset = data_importer.import_data()

In [None]:
tuner = flexynesis.HyperparameterTuning(train_dataset, 
                                        model_class = flexynesis.DirectPred, 
                                        target_variables = "Erlotinib",
                                        batch_variables = None,
                                        config_name = "DirectPred", 
                                        n_iter=5)    

In [None]:
model, best_params = tuner.perform_tuning()
torch.save(model, 'mymodel.pth')
#model = torch.load('mymodel.pth')
#model.eval()

In [None]:
y_pred_dict = model.predict(test_dataset)

In [None]:
metrics_df = flexynesis.evaluate_wrapper(y_pred_dict, test_dataset)
metrics_df

In [None]:
for var in model.target_variables:
    model.compute_feature_importance(var, steps = 50)

In [None]:
top_features = flexynesis.get_important_features(model, "Erlotinib", top=10)
top_features

In [None]:
top_features = top_features.groupby('layer')['name'].apply(list).to_dict() #convert to dict

In [None]:
df = flexynesis.subset_assays_by_features(test_dataset, top_features)
df

In [None]:
response_category = flexynesis.split_by_median(test_dataset.ann['Erlotinib'])

In [None]:
flexynesis.plot_scatter(df['cnv_EGFR'], test_dataset.ann['Erlotinib'])

In [None]:
f = 'gex_EGFR'
flexynesis.plot_boxplot(response_category, df[f], 'Erlotinib', f)