In [8]:
from typing import *
from sklearn.base import BaseEstimator
from sklearn.model_selection import KFold, StratifiedKFold, GridSearchCV

# Automatic model training

### Prototype 1

In [None]:
def use_gridsearch (training_dataset_x, training_dataset_y, testing_dataset_x, testing_dataset_y, estimator, param_grid, cross_val, scorers, scorer_to_use, **kwargs):
    gridsearch_instance = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=cross_val, scoring=scorers, refit=scorer_to_use, **kwargs)

    # ----- Starting the model training -----
    print("[+] Starting to train the model")
    gridsearch_instance.fit(training_dataset_x, training_dataset_y)

    # ----- Placing the training results into dictionaries -----
    print("[+] Model finished training -> {}".format(estimator))

    gridsearch_results_dict = {
        "gs_cv_results" : gridsearch_instance.cv_results_,
        "gs_best_score" : gridsearch_instance.best_score_,
        "gs_best_params" : gridsearch_instance.best_params_,
        "gs_best_estimator" : gridsearch_instance.best_estimator_
    }

    scores_for_default_metrics = {
        "accuracy" : accuracy_score(testing_dataset_y, gridsearch_instance.predict(testing_dataset_x)),
        "balanced_accuracy" : balanced_accuracy_score(testing_dataset_y, gridsearch_instance.predict(testing_dataset_x)),
        "precision" : precision_score(testing_dataset_y, gridsearch_instance.predict(testing_dataset_x)),
        "roc_auc_score" : roc_auc_curve(testing_dataset_y, gridsearch_instance.predict(testing_dataset_x))
    }

    return gridsearch_results_dict, scores_for_default_metrics

### Prototype 2

In [3]:
class TrainUsingGridSearch:
    def __init__ (self, test_dataset_y, estimator, param_grid, scoring_dictionary, cross_val_method, scorer_to_use, **kwargs):
        self.standard_params_dict = {
            "estimator" : estimator,
            "param_grid" : param_grid,
            "scoring" : scoring_dictionary,
            "refit" : scorer_to_use,
            "cv" : cross_val_method,
            "n_jobs" : 110,
            "verbose" : 3
        }
        self.gridsearch_attributes = {}
        self.default_metrics_scores = {}
        self.model_prediction_results = {}

    def _distribute_attributes_and_metrics (self, gridsearch_instance, testing_dataset_x, testing_dataset_y):
        # ----- Storing key names, function references and attributes in tuples for zip function looping -----
        metric_names_and_references = [
            ("accuracy_score", accuracy_score), 
            ("balanced_accuracy_score", balanced_accuracy_score), 
            ("precision_score", precision_score), 
            ("roc_auc_curve",  roc_auc_curve)
        ]
        gridsearch_attributes_tuple = [
            ("gridsearch_cv_results", gridsearch_instance.cv_results_), 
            ("best_score", gridsearch_instance.best_score_), 
            ("best_params", gridsearch_instancebest_params_), 
            ("scorer", gridsearch_instance.scorer_)
        ]

        # ----- Storing metric scores self.default_metrics_scores -----
        temp_model_predictions, temp_model_probability_predictions = gridsearch_instance.predict(testing_dataset_x), gridsearch_instance.predict_proba(testing_dataset_x)
        for metric_name_reference in metric_names_and_references:
            if metric_name_reference[0] == "roc_auc_curve":
                self.default_metrics_scores.update({metric_name_reference[0] : metric_name_reference[1](testing_dataset_y, temp_model_probability_predictions)})
            else:
                self.default_metrics_scores.update({metric_name_reference[0] : metric_name_reference[1](testing_dataset_y, temp_model_predictions)})

        # ----- Distributing attributes in self.gridsearch_attributes dictionary -----
        for gs_attribute_tuple in gridsearch_attributes_tuple:
            self.gridsearch_attributes.update({gs_attribute_tuple[0] : gs_attribute_tuple[1]})

        # ----- Storing model predictions and probabilities in self.model_prediction_results -----
        self.model_prediction_results.update({"raw_model_predictions" : temp_model_predictions})
        self.model_prediction_results.update({"model_probabilities_predictions" : temp_model_probability_predictions})
        
    def start_gridsearch_training (self, training_dataset_x, training_dataset_y, test_dataset_x, test_dataset_y):
        try:
            if any(dataset == None for dataset in [training_dataset_x, training_dataset_y, test_dataset_x, test_dataset_y]):
                raise ValueError("[-] Error: One of the datasets passed is has the value type of None")     
            else:
                # ----- Creating an instance of gridsearch -----
                print("[+] Instantiating GridSearch")
                gridsearch_instance = GridSearchCV(**self.standard_params_dict)

                # ----- Starting GridSearch training -----
                print("[+] Starting GridSearch training")
                gridsearch_instance.fit(training_dataset_x, training_dataset_y)

                # ----- Putting scores into dictionaries then returning the attributes, scores, and predictions -----
                self._distribute_attributes_and_metrics(gridsearch_instance, test_dataset_x, test_dataset_y)
                return self.gridsearch_attributes, self.default_metrics_scores, self.model_prediction_results
        except ValueError as dataset_missing_error:
            print(dataset_missing_error)

    def reset_model_predictions (self, dictionary_predictions_variable):
        dictionary_predictions_variable = self.model_prediction_results
        return dictionary_predictions_variable

# Model evaluation


In [None]:
class ModelEvaluation:
    def __init__ (self, test_dataset_x, test_dataset_y):
        pass

    def display_learning_curve (self):
        pass

    def display_roc_curve (self):
        pass

    def precision_recall_display (self):
        pass
