# Predictive validity tutorial #2

### Sparse data set containing multiple endpoints

In [1]:
import yaml
import os
import pandas as pd
from mosses.predictive_validity import evaluate_pv

The arguments required by `evaluate_pv()` can be passed as a dictionary of keyword arguments.\
Here we show that the arguments can be build directly in Jupyter.\
As part of preparing the arguments, `input_df` is used to load the corresponding dataframe needed by the function.

In [2]:
# Load dataframe from path
data_dir = os.path.join("data")
config_dict = dict()
data = pd.read_csv(os.path.join(data_dir, "multi_objective_validation_1.csv"))

Arguments here are generated by a factory which 

In [3]:
from copy import deepcopy

# Configure the dictionary template and the models in the data set
models = ("logd", "hlm_clint", "herg", "rh_clint", "solubility", "caco2_efflux_ratio")

dict_template = {
    "input_df": data,
    "observed_column": "{model_name}_exp",
    "predicted_column": "{model_name}_pred",
    "model_version": "{model_name}_model_version",
    "training_set_column": "{model_name}_set_split",
    "plot_title": "{model_name}",
    "sample_registration_date": "cpd_reg_date",
    
    # User-defined parameters
    "pos_class": "<=",
    "current_threshold": 3,
    "plot_scale": "linear"
}

class ModelDictFactory:
    def make_config_dict(model_name):
        config_dict = deepcopy(dict_template)
        for k, v in config_dict.items():
            config_dict[k] = v.format(model_name=model_name) if isinstance(v, str) else v
        return config_dict

In [4]:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

def capture_plots_to_pdf(func, filename, *args, **kwargs):
    captured_figures = []
    original_figure = plt.figure
    original_subplots = plt.subplots

    def wrapped_figure(*f_args, **f_kwargs):
        fig = original_figure(*f_args, **f_kwargs)
        captured_figures.append(fig)
        return fig

    def wrapped_subplots(*f_args, **f_kwargs):
        fig, ax = original_subplots(*f_args, **f_kwargs)
        captured_figures.append(fig)
        return fig, ax

    plt.figure = wrapped_figure
    plt.subplots = wrapped_subplots

    try:
        func(*args, **kwargs)
    finally:
        # Restore the original functions
        plt.figure = original_figure
        plt.subplots = original_subplots

    # Save captured figures
    with PdfPages(filename) as pdf:
        for fig in captured_figures:
            pdf.savefig(fig)
            plt.close(fig)         

In [None]:
# Capture all images into a PDF
def evaluate_pv_batch():
    for model_name in models:
        config_dict = ModelDictFactory.make_config_dict(model_name)
        evaluate_pv(**config_dict)

capture_plots_to_pdf(evaluate_pv_batch, "multi_model_validation_plots.pdf")