In [None]:
# Start with the imports.
import sklearn
from sklearn.calibration import calibration_curve
import seaborn as sns
from collections import defaultdict
from optunaz.three_step_opt_build_merge import (
    optimize,
    buildconfig_best,
    build_best,
    build_merged,
)
from optunaz.config import ModelMode, OptimizationDirection
from optunaz.config.optconfig import (
    CalibratedClassifierCVWithVA, 
    OptimizationConfig,
    SVR,
    Ridge,
    Lasso,
    PLSRegression,
    KNeighborsRegressor,
    RandomForestClassifier,
    LogisticRegression,
    SVC,
    KNeighborsClassifier,
    PRFClassifier,
    AdaBoostClassifier,
    ChemPropClassifier,
    ChemPropHyperoptClassifier
    
    
)
from optunaz.datareader import Dataset
from optunaz.descriptors import ECFP, MACCS_keys, ECFP_counts, PathFP
from optunaz.config.optconfig import CalibratedClassifierCVWithVA, RandomForestClassifier
from sklearn.calibration import calibration_curve
import seaborn as sns

from collections import defaultdict

import pandas as pd

from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    brier_score_loss,
    log_loss,
    roc_auc_score,
)


config = OptimizationConfig(
    data=Dataset(
        input_column="canonical_smiles",  # Typically "SMILES".
        response_column="pChEMBL_gt6", 
        training_dataset_file="./Data/MDM2_scaffold_train_set_20.csv",
        test_dataset_file="./Data/MDM2_scaffold_test_set_20.csv"
    ),
    descriptors=[
        ECFP.new(),
        ECFP_counts.new(),
        MACCS_keys.new(),
        PathFP.new()
    ],
    algorithms=[
        RandomForestClassifier.new(n_estimators={"low": 10, "high": 100}),
        SVC.new(),
        KNeighborsClassifier.new(),
        AdaBoostClassifier.new(),
        LogisticRegression.new(),
        ChemPropClassifier.new(),
        ChemPropHyperoptClassifier.new()
    ],

    
    settings=OptimizationConfig.Settings(
        mode=ModelMode.CLASSIFICATION,
        cross_validation=3,
        n_trials=100,  # Total number of trials.
        n_startup_trials=50,  # Number of startup ("random") trials.
        random_seed=42,  # Seed for reproducibility
        direction=OptimizationDirection.MAXIMIZATION,
        scoring="roc_auc" # best metric for hyperparameter op for classifi
    ),
)

# Run Optuna Study.
study = optimize(config, study_name="MDM2_my_studyR1")

import pickle  
# Get the best Trial from the Study and make a Build (Training) configuration for it.
buildconfig = buildconfig_best(study)
best_build = build_best(buildconfig, "./target/best.pkl")
import pickle
with open("./target/best.pkl", "rb") as f:
    model = pickle.load(f)

# Build (Train) and save the model on the merged train+test data.
build_merged(buildconfig, "./target/merged.pkl")

with open("./target/merged.pkl", "rb") as f:
    calibrated_model = pickle.load(f)
print(calibrated_model)