# Model Utilities

> Functions Used In Modeling Efforts

In [None]:
#| default_exp modeling.custom_utils

In [None]:
#| hide
from nbdev.showdoc import *
from fastcore.test import *

In [None]:
#| export

from data_system_utilities.azure.storage import FileHandling
from data_system_utilities.file_parsers import yaml
from data_system_utilities.snowflake.utils import make_stage_query_generator

from machine_learning_utilities import preprocessing

from LTBP.data.utils import snowflake_query, get_yaml_dicts, generate_data_lake_query

from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from rfpimp import *
from datetime import datetime

import sklearn.preprocessing as y_transform
import os
import logging
import pickle
import datetime as dt

  warn_incompatible_dep(


In [None]:
#| export


def evaluate(model,
             X_valid,
             y_valid,
             y_var,
             feature_importance=True):
    """
    Utlity to give experiment table information about the model
    this is fully customizable and can be changed to be regression
    RMSE, R2, MSE for example and changing the columns this function
    isn't a dynamic function it needs to be written for a specific use
    case.

    Args:
    * model (classifer): sklearn model for this
    * X_valid (np.array): Validation set Traing
    * y_valid (np.array): Actuals for Validation
    * y_var (str): variable name being predicted

    Returns:
    * dict: dependent on return statement
    """
    y_pred_proba = model.predict_proba(X_valid)
    y_pred = model.predict(X_valid)
    auc = metrics.roc_auc_score(y_valid, y_pred_proba[:, 1])
    acc = metrics.accuracy_score(y_valid, y_pred)
    bacc = metrics.balanced_accuracy_score(y_valid, y_pred)
    columns = ['auc', 'acc', 'bacc']
    logging.info(f'Variable(s) of interest {y_var} AUC: {auc:.3f}    Accuracy: {acc:.3f}    Balanced Accuracy: {bacc:.3f}')
    if feature_importance is True:
        fi_permutation = importances(model, X_valid, y_valid) # noqa:
        fi_permutation = (fi_permutation
                          .reset_index()
                          .rename({'Feature': 'COLS', 'Importance': 'IMP'}, axis=1))
        logging.info(f'Feature Importance df: \n {fi_permutation}')
    return auc, acc, bacc, columns, fi_permutation if feature_importance else None

In [None]:
#| export


def send_holdout_results_to_sf(sf, 
                               id_list:list,
                               probs,
                               experiment,
                               experiment_name,
                               etl_dict,
                               model_dict,
                               drop_table:bool=False
                              ):
    hold_out_df = pd.DataFrame(id_list)
    hold_out_df['PROBABILITY'] = probs[:, 1]
    hold_out_df['DATECREATED'] = dt.datetime.today().strftime('%Y-%m-%d %H:%M:%S')
    hold_out_df['EXP_COMMIT_CI_SHA'] = experiment_name+'_'+os.environ.get('CI_COMMIT_SHA', 'LocalRunNBS')
    logging.info(f'hold out data preview going to snowflake {hold_out_df.head(3)}')
    sf = snowflake_query(sfSchema=sfSchema)
    if drop_table:
        sf.run_sql_str(f"DROP TABLE {models['hold_out_table']}")
    sf.infer_to_snowflake(test_df_results,
                          table_name=models['hold_out_table'])
    logging.info('saving test prediction file')
    test_df_results.to_csv(f'holdout_{experiment_name}.csv', index=False)
    adls_path = os.path.join((os.path.join(etl_dict['data_lake_path'], 'experiments', experiment_name)
        if experiment 
        else os.path.join(etl_dict['data_lake_path'], 
        os.environ.get('CI_COMMIT_SHA', 'LocalRunNBS')))
        , 'holdout_results/')
    logging.info(f'sending prediction file to azure to {adls_path}')
    az = FileHandling(os.environ[model_dict['connection_str']])
    _ = az.upload_file(
        azure_file_path=adls_path,
        local_file_path=f'holdout_{experiment_name}.csv',
        container_name=etl["azure_container"],
        overwrite=True,
    )
    os.unlink(f'holdout_{experiment_name}.csv')

In [None]:
#| export


def move_dev_holdout_table_to_prod_location(sf,
                                            exp):
    logging.info('Replacing Prod HoldOut With Newest Promoted')
    sf.run_str_query(f"""
                      CREATE OR REPLACE TABLE MACHINELEARNINGOUTPUTS.ltbp.{exp['holdout_tb_name']} AS
                      SELECT * FROM MACHINELEARNINGOUTPUTS.DEV.{exp['holdout_tb_name']};
                      """)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()