<a href="https://colab.research.google.com/github/SKawsar/game_python/blob/main/testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# to remove unnecessary warnings
import warnings
import pandas as pd
import numpy as np

# from R_configuration import parameters_DT
from ml_scripts.helpers.console_log import send_console_log

from ml_scripts.regression.AB_regressor import adaboost_regressor
from ml_scripts.regression.DT_regressor import decision_tree_regressor
from ml_scripts.regression.EN_regressor import elastic_net_regressor
from ml_scripts.regression.R_function_lib_ML import cv_result
from ml_scripts.regression.R_function_lib_validation import test_error
from ml_scripts.regression.GB_regressor import gradient_boosting_regressor
from ml_scripts.regression.L_regressor import linear_regressor
from ml_scripts.regression.Lasso_regressor import lasso_regressor
from ml_scripts.regression.MLP_regressor import multi_layer_perceptron_regressor
from ml_scripts.regression.RF_regressor import random_forest_regressor
from ml_scripts.regression.Ridge_regressor import ridge_regressor
from ml_scripts.regression.SV_regressor import support_vector_regressor
from ml_scripts.regression.XGB_regressor import x_gradient_boosting_regressor
from ml_scripts.common.model_insight_functions import (feature_importance,
                                                       permutation_feature_importance,
                                                       linear_model_feature_importance)
warnings.filterwarnings('ignore')


class MlAlgo:
    def __init__(self, experiment_id):
        self.experiment_id = experiment_id
        # self.parameters_DT = parameters_DT

    def run_DT(self, x_train, y_train, kpi, uni_val, x_test, y_test, cv, parameter):

        send_console_log(self.experiment_id,
                         "$ Training and validating Decision Tree Regressor with " +
                         str(cv) + "-fold cross-validation …")

        # Grid search hyperparameter tuning
        model_dt = decision_tree_regressor(x_train,
                                           y_train,
                                           parameter["criterion"],
                                           parameter["max_features"],
                                           parameter["max_depth"],
                                           parameter["min_samples_leaf"],
                                           parameter["min_samples_split"],
                                           kpi,
                                           cv)

        # create a dataframe to hold the cross-validation results with the grid search
        cv_result_dt = cv_result(model_dt)

        # prediction on the test data
        y_pred_dt = np.round(model_dt.predict(x_test), 2)

        # calculate the error in test set
        cv_result_dt['test_score'] = np.round(test_error(y_test,
                                                         y_pred_dt,
                                                         kpi), 2)

        # determine the cross-validation results
        cv_result_dt['item'] = uni_val
        cv_result_dt['model'] = 'Decision Tree'

        # find feature importance of the model
        df_fi = feature_importance(model_dt, x_train, 'DT', uni_val)

        # visualize the tree diagram for the model
        # tree_viz(x_train, model_dt.best_estimator_, uni_val+'_tree_DT.dot', uni_val+'_tree_DT.png')

        return y_pred_dt, cv_result_dt.head(1), df_fi, model_dt.best_params_

    def run_RF(self, x_train, y_train, kpi, uni_val, x_test, y_test, cv, parameter):

        send_console_log(self.experiment_id,
                         "$ Training and validating Random Forest Regressor with "
                         + str(cv) + "-fold cross-validation ...")

        # Grid search hyperparameter tuning
        model_rf = random_forest_regressor(x_train,
                                           y_train,
                                           parameter["criterion"],
                                           parameter["n_estimators"],
                                           parameter["bootstrap"],
                                           parameter["max_features"],
                                           parameter["max_depth"],
                                           kpi,
                                           cv)

        # create a dataframe to hold the cross-validation results with the grid search
        cv_result_rf = cv_result(model_rf)

        # prediction on the test data
        y_pred_rf = np.round(model_rf.predict(x_test), 2)

        # calculate the error in test set
        cv_result_rf['test_score'] = np.round(test_error(y_test, y_pred_rf, kpi), 2)

        # determine the cross-validation results
        cv_result_rf['item'] = uni_val
        cv_result_rf['model'] = 'Random Forest'

        # find feature importance of the model
        df_fi = feature_importance(model_rf, x_train, 'RF', uni_val)

        # visualize the tree diagram for the model
        # tree_viz(x_train, model_rf.best_estimator_.estimators_[0], uni_val+'_tree_RF.dot', uni_val+'_tree_RF.png')

        return y_pred_rf, cv_result_rf.head(1), df_fi, model_rf.best_params_

    def run_AB(self, x_train, y_train, kpi, uni_val, x_test, y_test, cv, parameter):

        send_console_log(self.experiment_id,
                         "$ Training and validating AdaBoost Regressor with "
                         + str(cv) + "-fold cross-validation …")

        # Grid search hyperparameter tuning
        model_ab = adaboost_regressor(x_train,
                                      y_train,
                                      parameter["base_estimator"],
                                      parameter["n_estimators"],
                                      parameter["learning_rate"],
                                      parameter["loss"],
                                      kpi,
                                      cv)

        # create a dataframe to hold the cross-validation results with the grid search
        cv_result_ab = cv_result(model_ab)

        # prediction on the test data
        y_pred_ab = np.round(model_ab.predict(x_test), 2)

        # calculate the error in test set
        cv_result_ab['test_score'] = np.round(test_error(y_test, y_pred_ab, kpi), 2)

        # determine the cross-validation results
        cv_result_ab['item'] = uni_val
        cv_result_ab['model'] = 'AdaBoost'

        # find feature importance of the model
        df_fi = feature_importance(model_ab, x_train, 'AB', uni_val)

        return y_pred_ab, cv_result_ab.head(1), df_fi, model_ab.best_params_

    def run_GB(self, x_train, y_train, kpi, uni_val, x_test, y_test, cv, parameter):

        send_console_log(self.experiment_id,
                         "$ Training and validating Gradient Boosting Regressor with "
                         + str(cv) + "-fold cross-validation …")

        # Grid search hyperparameter tuning
        model_gb = gradient_boosting_regressor(x_train,
                                               y_train,
                                               parameter["criterion"],
                                               parameter["max_depth"],
                                               parameter["n_estimators"],
                                               parameter["learning_rate"],
                                               kpi,
                                               cv)

        # create a dataframe to hold the cross-validation results with the grid search
        cv_result_gb = cv_result(model_gb)

        # prediction on the test data
        y_pred_gb = np.round(model_gb.predict(x_test), 2)

        # calculate the error in test set
        cv_result_gb['test_score'] = np.round(test_error(y_test, y_pred_gb, kpi), 2)

        # determine the cross-validation results
        cv_result_gb['item'] = uni_val
        cv_result_gb['model'] = 'Gradient Boosting'

        # find feature importance of the model
        df_fi = feature_importance(model_gb, x_train, 'GB', uni_val)

        # visualize the tree diagram for the model
        # tree_viz(x_train, model_gb.best_estimator_.estimators_[0, 0], uni_val+'_tree_GB.dot', uni_val+'_tree_GB.png')

        return y_pred_gb, cv_result_gb.head(1), df_fi, model_gb.best_params_

    def run_MLP(self, x_train, y_train, kpi, uni_val, x_test, y_test, cv, parameter):

        send_console_log(self.experiment_id,
                         "$ Training and validating Artificial Neural Network with "
                         + str(cv) + "-fold cross-validation …")

        # Grid search hyperparameter tuning
        # model_mlp = multi_layer_perceptron_regressor(x_train, y_train, [0.1], ['tanh'], ['adam'], [16], [300],
        #                                              [(50, 50), (100,), (150,), (200,), (250,), (300,)], kpi, 5)
        model_mlp = multi_layer_perceptron_regressor(x_train,
                                                     y_train,
                                                     parameter["activation"],
                                                     parameter["solver"],
                                                     parameter["learning_rate_init"],
                                                     parameter["max_iter"],
                                                     parameter["hidden_layer_sizes"],
                                                     kpi,
                                                     cv)

        # determine the cross-validation results
        cv_result_mlp = cv_result(model_mlp)

        # prediction on the test data
        y_pred_mlp = np.round(model_mlp.predict(x_test), 2)

        # calculate the error in test set
        cv_result_mlp['test_score'] = np.round(test_error(y_test, y_pred_mlp, kpi), 2)

        # create a new column 'item' to save the unique value
        cv_result_mlp['item'] = uni_val
        cv_result_mlp['model'] = 'Neural Network'

        # find feature importance of the model
        df_fi = permutation_feature_importance(model_mlp, x_train, y_train, kpi, 'MLP', uni_val)

        return y_pred_mlp, cv_result_mlp.head(1), df_fi, model_mlp.best_params_

    def run_XGB(self, x_train, y_train, kpi, uni_val, x_test, y_test, cv, parameter):

        send_console_log(self.experiment_id,
                         "$ Training and validating Extreme Gradient Boosting Regressor with "
                         + str(cv) + "-fold cross-validation …")
        for col in x_train.columns:
            if x_train[col].dtype.name == 'category':
                x_train[col] = x_train[col].astype('int')
                x_test[col] = x_test[col].astype('int')

        # Grid search hyperparameter tuning
        model_xgb = x_gradient_boosting_regressor(x_train,
                                                  y_train,
                                                  parameter["max_depth"],
                                                  parameter["n_estimators"],
                                                  parameter["learning_rate"],
                                                  kpi,
                                                  cv)

        # create a dataframe to hold the cross-validation results with the grid search
        cv_result_xgb = cv_result(model_xgb)

        # prediction on the test data
        y_pred_xgb = np.round(model_xgb.predict(x_test), 2)

        # calculate the error in test set
        cv_result_xgb['test_score'] = np.round(test_error(y_test, y_pred_xgb, kpi), 2)

        # determine the cross-validation results
        cv_result_xgb['item'] = uni_val
        cv_result_xgb['model'] = 'XGB'

        # find feature importance of the model
        df_fi = feature_importance(model_xgb, x_train, "XGB", uni_val)

        # # tree diagram for XGB
        # plot_tree(model_xgb.best_estimator_, num_trees=1, rankdir='LR')
        # # save the tree diagram
        # plt.savefig(tree_plot_filepath + "tree_XGB_" + uni_val + '.png', bbox_inches='tight')

        return y_pred_xgb, cv_result_xgb.head(1), df_fi, model_xgb.best_params_

    def run_LR(self, x_train, y_train, kpi, uni_val, x_test, y_test, cv, parameter):

        send_console_log(self.experiment_id,
                         "$ Training and validating Linear regression with "
                         + str(cv) + "-fold cross-validation …")

        # Grid search hyperparameter tuning
        model_lr = linear_regressor(x_train,
                                    y_train,
                                    parameter[""],
                                    kpi,
                                    cv)

        # create a dataframe to hold the cross-validation results with the grid search
        cv_result_lr = cv_result(model_lr)

        # prediction on the test data
        y_pred_lr = np.round(model_lr.predict(x_test), 2)

        # calculate the error in test set
        cv_result_lr['test_score'] = np.round(test_error(y_test, y_pred_lr, kpi), 2)

        # determine the cross-validation results
        cv_result_lr['item'] = uni_val
        cv_result_lr['model'] = 'Linear regression'

        # find feature importance of the model
        # df_fi = permutation_feature_importance(model_lr, x_train, y_train, kpi, "LR", uni_val)
        df_fi = linear_model_feature_importance(model_lr, x_train, "LR", uni_val)

        return y_pred_lr, cv_result_lr.head(1), df_fi, model_lr.best_params_

    def run_Ri(self, x_train, y_train, kpi, uni_val, x_test, y_test, cv, parameter):

        send_console_log(self.experiment_id,
                         "$ Training and validating Ridge regression with "
                         + str(cv) + "-fold cross-validation …")

        # Grid search hyperparameter tuning
        model_ri = ridge_regressor(x_train,
                                   y_train,
                                   parameter["alpha"],
                                   parameter["fit_intercept"],
                                   kpi,
                                   cv)

        # create a dataframe to hold the cross-validation results with the grid search
        cv_result_ri = cv_result(model_ri)

        # prediction on the test data
        y_pred_ri = np.round(model_ri.predict(x_test), 2)

        # calculate the error in test set
        cv_result_ri['test_score'] = np.round(test_error(y_test, y_pred_ri, kpi), 2)

        # determine the cross-validation results
        cv_result_ri['item'] = uni_val
        cv_result_ri['model'] = 'Ridge regression'

        # find feature importance of the model
        # df_fi = permutation_feature_importance(model_ri, x_train, y_train, kpi, "Ri", uni_val)
        df_fi = linear_model_feature_importance(model_ri, x_train, "Ri", uni_val)

        return y_pred_ri, cv_result_ri.head(1), df_fi, model_ri.best_params_

    def run_La(self, x_train, y_train, kpi, uni_val, x_test, y_test, cv, parameter):

        send_console_log(self.experiment_id,
                         "$ Training and validating Lasso regression with "
                         + str(cv) + "-fold cross-validation …")

        # Grid search hyperparameter tuning
        model_la = lasso_regressor(x_train,
                                   y_train,
                                   parameter["alpha"],
                                   parameter["fit_intercept"],
                                   kpi,
                                   cv)

        # create a dataframe to hold the cross-validation results with the grid search
        cv_result_la = cv_result(model_la)

        # prediction on the test data
        y_pred_la = np.round(model_la.predict(x_test), 2)

        # calculate the error in test set
        cv_result_la['test_score'] = np.round(
            test_error(y_test, y_pred_la, kpi), 2)

        # determine the cross-validation results
        cv_result_la['item'] = uni_val
        cv_result_la['model'] = 'Lasso regression'

        # find feature importance of the model
        # df_fi = permutation_feature_importance(model_la, x_train, y_train, kpi, "La", uni_val)
        df_fi = linear_model_feature_importance(model_la, x_train, "La", uni_val)

        return y_pred_la, cv_result_la.head(1), df_fi, model_la.best_params_

    def run_EN(self, x_train, y_train, kpi, uni_val, x_test, y_test, cv, parameter):

        send_console_log(self.experiment_id,
                         "$ Training and validating ElasticNet regression with "
                         + str(cv) + "-fold cross-validation …")

        # Grid search hyperparameter tuning
        model_en = elastic_net_regressor(x_train,
                                         y_train,
                                         parameter["alpha"],
                                         parameter["l1_ratio"],
                                         parameter["fit_intercept"],
                                         kpi,
                                         cv)

        # create a dataframe to hold the cross-validation results with the grid search
        cv_result_en = cv_result(model_en)

        # prediction on the test data
        y_pred_en = np.round(model_en.predict(x_test), 2)

        # calculate the error in test set
        cv_result_en['test_score'] = np.round(test_error(y_test, y_pred_en, kpi), 2)

        # determine the cross-validation results
        cv_result_en['item'] = uni_val
        cv_result_en['model'] = 'ElasticNet'

        # find feature importance of the model
        df_fi = linear_model_feature_importance(model_en, x_train, "EN", uni_val)
        # df_fi = permutation_feature_importance(model_en, x_train, y_train, kpi, "EN", uni_val)

        return y_pred_en, cv_result_en.head(1), df_fi, model_en.best_params_

    def run_SV(self, x_train, y_train, kpi, uni_val, x_test, y_test, cv, parameter):

        send_console_log(self.experiment_id,
                         "$ Training and validating Support Vector Regressor with "
                         + str(cv) + "-fold cross-validation ...")

        # Grid search hyperparameter tuning
        model_sv = support_vector_regressor(x_train,
                                            y_train,
                                            parameter["kernel"],
                                            parameter["c"],
                                            kpi,
                                            cv)

        # create a dataframe to hold the cross-validation results with the grid search
        cv_result_sv = cv_result(model_sv)

        # prediction on the test data
        y_pred_sv = np.round(model_sv.predict(x_test), 2)

        # calculate the error in test set
        cv_result_sv['test_score'] = np.round(test_error(y_test, y_pred_sv, kpi), 2)

        # determine the cross-validation results
        cv_result_sv['item'] = uni_val
        cv_result_sv['model'] = 'Support Vector'

        # find feature importance of the model
        df_fi = permutation_feature_importance(model_sv, x_train, y_train, kpi, "SV", uni_val)

        return y_pred_sv, cv_result_sv.head(1), df_fi, model_sv.best_params_
