In [1]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import importlib
import torch
import pandas as pd
import os
import sys
import json
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from recover.utils.utils import get_tensor_dataset
import reservoir as rdl
from copy import deepcopy
import numpy as np
from tqdm import tqdm

In [2]:
#####################################
# Methods to get different types of loaders
#####################################


def get_regular_valid_loader(trainer):
    return trainer.valid_loader

def get_test_loader(trainer):
    test_dataset = get_tensor_dataset(trainer.data, trainer.test_idxs)
    test_loader = DataLoader(test_dataset, batch_size=128)
    
    return test_loader


#####################################
# Iterator over trainers for a given config
#####################################


def trainer_iterator(config_file, path_to_conf):
    configuration = importlib.import_module("recover.config." + config_file).configuration

    # Loop over all runs for this configuration
    for run_dir in tqdm(os.listdir(os.path.join(path_to_conf, config_file))):
        print(run_dir)
        if run_dir.startswith('BasicTrainer'):

            this_run_results = {}

            # Load params for that run
            with open(os.path.join(path_to_conf, config_file, run_dir, 'params.json')) as f:
                params = json.load(f)

            # Load configuration (can contain grid_search args)
            this_run_config = deepcopy(configuration)

            # Replace grid_search args by the actual parameter for that run
            for key in this_run_config['trainer_config']:
                if type(this_run_config['trainer_config'][key]) is dict \
                and 'grid_search' in this_run_config['trainer_config'][key].keys():
                    
                    # If grid search over python classes, we need to load them
                    if type(params[key]) is str and params[key].startswith('<class'):
                        class_to_load = params[key]
                        class_to_load = class_to_load.split("'")[1]
                        class_to_load = class_to_load.rpartition('.')
                        path_to_class = class_to_load[0]
                        class_to_load_name = class_to_load[-1]
                        params[key] = getattr(importlib.import_module(path_to_class), 
                                              class_to_load_name)
                    this_run_config['trainer_config'][key] = params[key]
                    this_run_results[key] = params[key]

            # Load trainer
            trainer = this_run_config["trainer"](this_run_config["trainer_config"])

            # Find the checkpoint corresponding to the best epoch (always two checkpoints, 
            # corresponding to best and last epochs)
            cpt = 0
            checkpoint = None
            for dir_check in os.listdir(os.path.join(path_to_conf, config_file, run_dir)):
                if dir_check.startswith('checkpoint'):
                    cpt += 1
                    if checkpoint is None:
                        checkpoint = dir_check
                    else:
                        if int(dir_check.split('_')[-1]) < int(checkpoint.split('_')[-1]):
                            checkpoint = dir_check
                            
            if cpt == 2:
                # Only yield trainer if 2 checkpoints have been saved (corresponding to best and last epochs)

                # Load model
                trainer.model.load_state_dict(torch.load(path_to_conf + config_file + "/" + 
                                                 run_dir + "/" + checkpoint + "/model.pth",
                                                 map_location=torch.device('cpu')))
                print("Loaded model from", run_dir, checkpoint)
                
                yield trainer

#####################################
# Main evaluation method
#####################################


def evaluate_config(config_file, path_to_conf, get_eval_loader=get_regular_valid_loader):
    all_results = pd.DataFrame()

    for trainer in trainer_iterator(config_file, path_to_conf):
        
        this_run_results = {}

        # Evaluate
        eval_metrics, _ = trainer.eval_epoch(trainer.data, get_eval_loader(trainer), 
                                             trainer.model)

        # Create dataframe for this run
        print("this run results", this_run_results)
        print("eval metrics", eval_metrics)

        this_run_results = {**this_run_results, **eval_metrics}
        this_run_results['prop_of_shuffled_drugs'] = trainer.config['prop_of_shuffled_drugs']
        
        for key in this_run_results.keys():
            this_run_results[key] = [this_run_results[key]]

        this_run_df = pd.DataFrame.from_dict(this_run_results)

        all_results = pd.concat([all_results, this_run_df])

    all_results.reset_index()
        
    return all_results
        

## Pair level split (default)

In [3]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="model_evaluation_partially_shuffled", 
             path_to_conf="/Users/paul/PycharmProjects/NewReservoir/"
                              "Recover/RayLogs/",
             get_eval_loader=get_test_loader)

sys.stdout = old_stdout # reset old stdout

for prop in all_results["prop_of_shuffled_drugs"].unique():
    prop_results = all_results[all_results["prop_of_shuffled_drugs"] == prop]
    print("Shuffled proportion", prop)

    print("R2", round(prop_results['comb_r_squared'].mean(), 3), "\pm", 
          round(prop_results['comb_r_squared'].std(),3))
    print('spearman', round(prop_results['spearman'].mean(), 3), "\pm", 
          round(prop_results['spearman'].std(), 3))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:02<00:00,  7.64it/s]

Shuffled proportion 0.25
R2 0.206 \pm 0.014
spearman 0.46 \pm 0.036
Shuffled proportion 0.0
R2 0.24 \pm 0.003
spearman 0.464 \pm 0.005
Shuffled proportion 0.75
R2 0.22 \pm 0.016
spearman 0.452 \pm 0.026
Shuffled proportion 1.0
R2 0.258 \pm 0.003
spearman 0.48 \pm 0.022
Shuffled proportion 0.5
R2 0.229 \pm 0.018
spearman 0.464 \pm 0.021





In [4]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="model_evaluation_partially_shuffled", 
             path_to_conf="/Users/paul/PycharmProjects/NewReservoir/"
                              "Recover/RayLogs/",
             get_eval_loader=get_regular_valid_loader)

sys.stdout = old_stdout # reset old stdout

for prop in all_results["prop_of_shuffled_drugs"].unique():
    prop_results = all_results[all_results["prop_of_shuffled_drugs"] == prop]
    print("Shuffled proportion", prop)

    print("R2", round(prop_results['comb_r_squared'].mean(), 3), "\pm", 
          round(prop_results['comb_r_squared'].std(),3))
    print('spearman', round(prop_results['spearman'].mean(), 3), "\pm", 
          round(prop_results['spearman'].std(), 3))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:02<00:00,  5.96it/s]

Shuffled proportion 0.25
R2 0.319 \pm 0.025
spearman 0.484 \pm 0.037
Shuffled proportion 0.0
R2 0.346 \pm 0.051
spearman 0.474 \pm 0.021
Shuffled proportion 0.75
R2 0.323 \pm 0.061
spearman 0.452 \pm 0.014
Shuffled proportion 1.0
R2 0.349 \pm 0.047
spearman 0.478 \pm 0.02
Shuffled proportion 0.5
R2 0.337 \pm 0.051
spearman 0.461 \pm 0.02





## Drug Level Split

In [5]:
from recover.datasets.drugcomb_matrix_data import DrugCombMatrixDrugLevelSplitTest

def get_drug_split_test_loader(trainer):
    
    dl_split_data = DrugCombMatrixDrugLevelSplitTest(cell_line='MCF7',
                                     fp_bits=1024,
                                     fp_radius=2)
    dl_split_data.data.ddi_edge_response = dl_split_data.data.ddi_edge_bliss_max
    
    test_idxs = range(len(dl_split_data.data.ddi_edge_response))
    
    test_dataset = get_tensor_dataset(dl_split_data.data, test_idxs)
    test_loader = DataLoader(test_dataset, batch_size=128)
    
    return test_loader

In [6]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="model_drug_level_split_partially_shuffled", 
             path_to_conf="/Users/paul/PycharmProjects/NewReservoir/"
                              "Recover/RayLogs/",
             get_eval_loader=get_drug_split_test_loader)

sys.stdout = old_stdout # reset old stdout

for prop in all_results["prop_of_shuffled_drugs"].unique():
    prop_results = all_results[all_results["prop_of_shuffled_drugs"] == prop]
    print("Shuffled proportion", prop)

    print("R2", round(prop_results['comb_r_squared'].mean(), 3), "\pm", 
          round(prop_results['comb_r_squared'].std(),3))
    print('spearman', round(prop_results['spearman'].mean(), 3), "\pm", 
          round(prop_results['spearman'].std(), 3))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:02<00:00,  8.20it/s]

Shuffled proportion 0.75
R2 0.0 \pm 0.0
spearman 0.037 \pm 0.05
Shuffled proportion 0.25
R2 0.018 \pm 0.016
spearman 0.096 \pm 0.066
Shuffled proportion 0.0
R2 0.036 \pm 0.004
spearman 0.153 \pm 0.01
Shuffled proportion 0.5
R2 0.01 \pm 0.005
spearman -0.024 \pm 0.105
Shuffled proportion 1.0
R2 0.02 \pm 0.029
spearman -0.092 \pm 0.143





In [7]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="model_drug_level_split_partially_shuffled", 
             path_to_conf="/Users/paul/PycharmProjects/NewReservoir/"
                              "Recover/RayLogs/",
             get_eval_loader=get_regular_valid_loader)

sys.stdout = old_stdout # reset old stdout


for prop in all_results["prop_of_shuffled_drugs"].unique():
    prop_results = all_results[all_results["prop_of_shuffled_drugs"] == prop]
    print("Shuffled proportion", prop)

    print("R2", round(prop_results['comb_r_squared'].mean(), 3), "\pm", 
          round(prop_results['comb_r_squared'].std(),3))
    print('spearman', round(prop_results['spearman'].mean(), 3), "\pm", 
          round(prop_results['spearman'].std(), 3))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:01<00:00,  8.92it/s]

Shuffled proportion 0.75
R2 0.379 \pm 0.188
spearman 0.445 \pm 0.087
Shuffled proportion 0.25
R2 0.376 \pm 0.17
spearman 0.446 \pm 0.083
Shuffled proportion 0.0
R2 0.406 \pm 0.155
spearman 0.455 \pm 0.064
Shuffled proportion 0.5
R2 0.398 \pm 0.159
spearman 0.452 \pm 0.061
Shuffled proportion 1.0
R2 0.382 \pm 0.171
spearman 0.438 \pm 0.08





## One hidden drug

In [8]:
from recover.datasets.drugcomb_matrix_data import DrugCombMatrixOneHiddenDrugSplitTest

def get_one_hidden_drug_split_test_loader(trainer):
    
    ohd_split_data = DrugCombMatrixOneHiddenDrugSplitTest(cell_line='MCF7',
                                     fp_bits=1024,
                                     fp_radius=2)
    ohd_split_data.data.ddi_edge_response = ohd_split_data.data.ddi_edge_bliss_max
    
    test_idxs = range(len(ohd_split_data.data.ddi_edge_response))
    
    test_dataset = get_tensor_dataset(ohd_split_data.data, test_idxs)
    test_loader = DataLoader(test_dataset, batch_size=128)
    
    return test_loader

In [9]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="one_hidden_drug_split_partially_shuffled", 
             path_to_conf="/Users/paul/PycharmProjects/NewReservoir/"
                              "Recover/RayLogs/",
             get_eval_loader=get_one_hidden_drug_split_test_loader)

sys.stdout = old_stdout # reset old stdout

for prop in all_results["prop_of_shuffled_drugs"].unique():
    prop_results = all_results[all_results["prop_of_shuffled_drugs"] == prop]
    print("Shuffled proportion", prop)

    print("R2", round(prop_results['comb_r_squared'].mean(), 3), "\pm", 
          round(prop_results['comb_r_squared'].std(),3))
    print('spearman', round(prop_results['spearman'].mean(), 3), "\pm", 
          round(prop_results['spearman'].std(), 3))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:14<00:00,  1.14it/s]

Shuffled proportion 0.75
R2 0.109 \pm 0.021
spearman 0.257 \pm 0.096
Shuffled proportion 0.25
R2 0.131 \pm 0.015
spearman 0.257 \pm 0.02
Shuffled proportion 0.5
R2 0.095 \pm 0.038
spearman 0.201 \pm 0.07
Shuffled proportion 0.0
R2 0.18 \pm 0.023
spearman 0.32 \pm 0.016
Shuffled proportion 1.0
R2 0.1 \pm 0.062
spearman 0.255 \pm 0.063





In [10]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="one_hidden_drug_split_partially_shuffled", 
             path_to_conf="/Users/paul/PycharmProjects/NewReservoir/"
                              "Recover/RayLogs/",
             get_eval_loader=get_regular_valid_loader)

sys.stdout = old_stdout # reset old stdout


for prop in all_results["prop_of_shuffled_drugs"].unique():
    prop_results = all_results[all_results["prop_of_shuffled_drugs"] == prop]
    print("Shuffled proportion", prop)

    print("R2", round(prop_results['comb_r_squared'].mean(), 3), "\pm", 
          round(prop_results['comb_r_squared'].std(),3))
    print('spearman', round(prop_results['spearman'].mean(), 3), "\pm", 
          round(prop_results['spearman'].std(), 3))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:09<00:00,  1.82it/s]

Shuffled proportion 0.75
R2 0.257 \pm 0.067
spearman 0.418 \pm 0.024
Shuffled proportion 0.25
R2 0.277 \pm 0.061
spearman 0.427 \pm 0.025
Shuffled proportion 0.5
R2 0.259 \pm 0.064
spearman 0.426 \pm 0.022
Shuffled proportion 0.0
R2 0.308 \pm 0.097
spearman 0.459 \pm 0.053
Shuffled proportion 1.0
R2 0.252 \pm 0.061
spearman 0.416 \pm 0.029



