In [1]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import importlib
import torch
import pandas as pd
import os
import sys
import json
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from recover.utils.utils import get_tensor_dataset
import reservoir as rdl
from copy import deepcopy
import numpy as np
from tqdm import tqdm

In [2]:
#####################################
# Methods to get different types of loaders
#####################################


def get_regular_valid_loader(trainer):
    return trainer.valid_loader

def get_test_loader(trainer):
    test_dataset = get_tensor_dataset(trainer.data, trainer.test_idxs)
    test_loader = DataLoader(test_dataset, batch_size=128)
    
    return test_loader


#####################################
# Iterator over trainers for a given config
#####################################


def trainer_iterator(config_file, path_to_conf):
    configuration = importlib.import_module("recover.config." + config_file).configuration

    # Loop over all runs for this configuration
    for run_dir in tqdm(os.listdir(os.path.join(path_to_conf, config_file))):
        print(run_dir)
        if run_dir.startswith('BasicTrainer'):

            this_run_results = {}

            # Load params for that run
            with open(os.path.join(path_to_conf, config_file, run_dir, 'params.json')) as f:
                params = json.load(f)

            # Load configuration (can contain grid_search args)
            this_run_config = deepcopy(configuration)

            # Replace grid_search args by the actual parameter for that run
            for key in this_run_config['trainer_config']:
                if type(this_run_config['trainer_config'][key]) is dict \
                and 'grid_search' in this_run_config['trainer_config'][key].keys():
                    
                    # If grid search over python classes, we need to load them
                    if type(params[key]) is str and params[key].startswith('<class'):
                        class_to_load = params[key]
                        class_to_load = class_to_load.split("'")[1]
                        class_to_load = class_to_load.rpartition('.')
                        path_to_class = class_to_load[0]
                        class_to_load_name = class_to_load[-1]
                        params[key] = getattr(importlib.import_module(path_to_class), 
                                              class_to_load_name)
                    this_run_config['trainer_config'][key] = params[key]
                    this_run_results[key] = params[key]

            # Load trainer
            trainer = this_run_config["trainer"](this_run_config["trainer_config"])

            # Find the checkpoint corresponding to the best epoch (always two checkpoints, 
            # corresponding to best and last epochs)
            cpt = 0
            checkpoint = None
            for dir_check in os.listdir(os.path.join(path_to_conf, config_file, run_dir)):
                if dir_check.startswith('checkpoint'):
                    cpt += 1
                    if checkpoint is None:
                        checkpoint = dir_check
                    else:
                        if int(dir_check.split('_')[-1]) < int(checkpoint.split('_')[-1]):
                            checkpoint = dir_check
                            
            if cpt == 2:
                # Only yield trainer if 2 checkpoints have been saved (corresponding to best and last epochs)

                # Load model
                trainer.model.load_state_dict(torch.load(path_to_conf + config_file + "/" + 
                                                 run_dir + "/" + checkpoint + "/model.pth",
                                                 map_location=torch.device('cpu')))
                print("Loaded model from", run_dir, checkpoint)
                
                yield trainer

#####################################
# Main evaluation method
#####################################


def evaluate_config(config_file, path_to_conf, get_eval_loader=get_regular_valid_loader):
    all_results = pd.DataFrame()

    for trainer in trainer_iterator(config_file, path_to_conf):
        
        this_run_results = {}

        # Evaluate
        eval_metrics, _ = trainer.eval_epoch(trainer.data, get_eval_loader(trainer), 
                                             trainer.model)

        # Create dataframe for this run
        print("this run results", this_run_results)
        print("eval metrics", eval_metrics)

        this_run_results = {**this_run_results, **eval_metrics}
        for key in this_run_results.keys():
            this_run_results[key] = [this_run_results[key]]

        this_run_df = pd.DataFrame.from_dict(this_run_results)

        all_results = all_results.append(this_run_df)

    all_results.reset_index()
        
    return all_results
        

## Pair level split (default)

In [3]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="model_evaluation", 
             path_to_conf="/Users/paul/PycharmProjects/RECOVERcoalition/"
                              "Recover/RayLogs/",
             get_eval_loader=get_test_loader)

sys.stdout = old_stdout # reset old stdout

print("R2", round(all_results['comb_r_squared'].mean(), 3), "\pm", 
      round(all_results['comb_r_squared'].std(),3))
print('spearman', round(all_results['spearman'].mean(), 3), "\pm", 
      round(all_results['spearman'].std(), 3))

100%|██████████| 6/6 [00:00<00:00,  7.89it/s]

R2 0.242 \pm 0.006
spearman 0.466 \pm 0.007





In [4]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="model_evaluation", 
             path_to_conf="/Users/paul/PycharmProjects/RECOVERcoalition/"
                              "Recover/RayLogs/",
             get_eval_loader=get_regular_valid_loader)

sys.stdout = old_stdout # reset old stdout

print("R2", round(all_results['comb_r_squared'].mean(), 3), "\pm", 
      round(all_results['comb_r_squared'].std(),3))
print('spearman', round(all_results['spearman'].mean(), 3), "\pm", 
      round(all_results['spearman'].std(), 3))

100%|██████████| 6/6 [00:01<00:00,  5.53it/s]

R2 0.343 \pm 0.053
spearman 0.474 \pm 0.021





## Drug Level Split

In [5]:
from recover.datasets.drugcomb_matrix_data import DrugCombMatrixDrugLevelSplitTest

def get_drug_split_test_loader(trainer):
    
    dl_split_data = DrugCombMatrixDrugLevelSplitTest(cell_line='MCF7',
                                     fp_bits=1024,
                                     fp_radius=2)
    dl_split_data.data.ddi_edge_response = dl_split_data.data.ddi_edge_bliss_max
    
    test_idxs = range(len(dl_split_data.data.ddi_edge_response))
    
    test_dataset = get_tensor_dataset(dl_split_data.data, test_idxs)
    test_loader = DataLoader(test_dataset, batch_size=128)
    
    return test_loader

In [6]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="model_drug_level_split", 
             path_to_conf="/Users/paul/PycharmProjects/RECOVERcoalition/"
                              "Recover/RayLogs/",
             get_eval_loader=get_drug_split_test_loader)

sys.stdout = old_stdout # reset old stdout

print("R2", round(all_results['comb_r_squared'].mean(), 3), "\pm", 
      round(all_results['comb_r_squared'].std(),3))
print('spearman', round(all_results['spearman'].mean(), 3), "\pm", 
      round(all_results['spearman'].std(), 3))

100%|██████████| 5/5 [00:00<00:00,  8.11it/s]


R2 0.038 \pm 0.002
spearman 0.157 \pm 0.012


In [7]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="model_drug_level_split", 
             path_to_conf="/Users/paul/PycharmProjects/RECOVERcoalition/"
                              "Recover/RayLogs/",
             get_eval_loader=get_regular_valid_loader)

sys.stdout = old_stdout # reset old stdout


print("R2", round(all_results['comb_r_squared'].mean(), 3), "\pm", 
      round(all_results['comb_r_squared'].std(),3))
print('spearman', round(all_results['spearman'].mean(), 3), "\pm", 
      round(all_results['spearman'].std(), 3))

100%|██████████| 5/5 [00:00<00:00,  7.61it/s]

R2 0.401 \pm 0.147
spearman 0.459 \pm 0.069





## Multi Cell Line

In [8]:
def get_mcf7_test_loader(trainer):

    mcf7_idxs = np.where(np.array(trainer.data.ddi_edge_classes) == trainer.data.cell_line_to_idx_dict['MCF7'])[0]
    
    mcf7_test_idxs = list(set(np.array(trainer.test_idxs)).intersection(mcf7_idxs))
    mcf7_test_idxs = torch.Tensor(mcf7_test_idxs).long()

    test_dataset = get_tensor_dataset(trainer.data, mcf7_test_idxs)
    test_loader = DataLoader(test_dataset, batch_size=128)
    
    return test_loader

In [9]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="model_evaluation_multi_cell_line", 
             path_to_conf="/Users/paul/PycharmProjects/RECOVERcoalition/"
                              "Recover/RayLogs/",
             get_eval_loader=get_mcf7_test_loader)

sys.stdout = old_stdout # reset old stdout

print("R2", round(all_results['comb_r_squared'].mean(), 3), "\pm", 
      round(all_results['comb_r_squared'].std(),3))
print('spearman', round(all_results['spearman'].mean(), 3), "\pm", 
      round(all_results['spearman'].std(), 3))

100%|██████████| 6/6 [00:03<00:00,  1.58it/s]

R2 0.282 \pm 0.017
spearman 0.448 \pm 0.021





In [10]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="model_evaluation_multi_cell_line", 
             path_to_conf="/Users/paul/PycharmProjects/RECOVERcoalition/"
                              "Recover/RayLogs/",
             get_eval_loader=get_regular_valid_loader)

sys.stdout = old_stdout # reset old stdout

print("R2", round(all_results['comb_r_squared'].mean(), 3), "\pm", 
      round(all_results['comb_r_squared'].std(),3))
print('spearman', round(all_results['spearman'].mean(), 3), "\pm", 
      round(all_results['spearman'].std(), 3))

100%|██████████| 6/6 [01:34<00:00, 15.81s/it]

R2 0.387 \pm 0.032
spearman 0.518 \pm 0.021





## Cell Line Transfer

In [11]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="cell_line_transfer", 
             path_to_conf="/Users/paul/PycharmProjects/RECOVERcoalition/"
                              "Recover/RayLogs/",
             get_eval_loader=get_test_loader)

sys.stdout = old_stdout # reset old stdout

print("R2", round(all_results['comb_r_squared'].mean(), 3), "\pm", 
      round(all_results['comb_r_squared'].std(),3))
print('spearman', round(all_results['spearman'].mean(), 3), "\pm", 
      round(all_results['spearman'].std(), 3))

100%|██████████| 6/6 [01:25<00:00, 14.26s/it]

R2 0.382 \pm 0.017
spearman 0.378 \pm 0.015





In [12]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="cell_line_transfer", 
             path_to_conf="/Users/paul/PycharmProjects/RECOVERcoalition/"
                              "Recover/RayLogs/",
             get_eval_loader=get_regular_valid_loader)

sys.stdout = old_stdout # reset old stdout

print("R2", round(all_results['comb_r_squared'].mean(), 3), "\pm", 
      round(all_results['comb_r_squared'].std(),3))
print('spearman', round(all_results['spearman'].mean(), 3), "\pm", 
      round(all_results['spearman'].std(), 3))

100%|██████████| 6/6 [12:10<00:00, 121.67s/it]

R2 0.278 \pm 0.027
spearman 0.299 \pm 0.047





## Study Transfer

In [13]:
from recover.datasets.drugcomb_matrix_data import DrugCombMatrixTestAlmanac

def get_trimmed_Almanac_test_loader(trainer):
    
    dl_split_data = DrugCombMatrixTestAlmanac(cell_line=None,
                                     fp_bits=1024,
                                     fp_radius=2)
    dl_split_data.data.ddi_edge_response = dl_split_data.data.ddi_edge_bliss_max
    
    test_idxs = range(len(dl_split_data.data.ddi_edge_response))
    
    test_dataset = get_tensor_dataset(dl_split_data.data, test_idxs)
    test_loader = DataLoader(test_dataset, batch_size=128)
    
    return test_loader

In [14]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="pretrain_ONEIL", 
             path_to_conf="/Users/paul/PycharmProjects/RECOVERcoalition/"
                              "Recover/RayLogs/",
             get_eval_loader=get_trimmed_Almanac_test_loader)

sys.stdout = old_stdout # reset old stdout

print("R2", round(all_results['comb_r_squared'].mean(), 3), "\pm", 
      round(all_results['comb_r_squared'].std(),3))
print('spearman', round(all_results['spearman'].mean(), 3), "\pm", 
      round(all_results['spearman'].std(), 3))

100%|██████████| 6/6 [00:03<00:00,  1.88it/s]

R2 0.014 \pm 0.016
spearman 0.147 \pm 0.075





In [15]:
old_stdout = sys.stdout # backup current stdout
sys.stdout = open(os.devnull, "w")

all_results = evaluate_config(config_file="pretrain_ONEIL", 
             path_to_conf="/Users/paul/PycharmProjects/RECOVERcoalition/"
                              "Recover/RayLogs/",
             get_eval_loader=get_regular_valid_loader)

sys.stdout = old_stdout # reset old stdout

print("R2", round(all_results['comb_r_squared'].mean(), 3), "\pm", 
      round(all_results['comb_r_squared'].std(),3))
print('spearman', round(all_results['spearman'].mean(), 3), "\pm", 
      round(all_results['spearman'].std(), 3))

100%|██████████| 6/6 [00:22<00:00,  3.72s/it]

R2 0.304 \pm 0.021
spearman 0.589 \pm 0.032



