### Model Training

Before running this notebook makesure to run the following command in gitbash under the `04_models` directory: <br>
> `mlflow ui`

##### Notebook Setup

In [1]:
# Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import mlflow

from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import RocCurveDisplay


##### Data Pull

In [2]:
data = pd.read_csv('../02_data/02_processed/credit_card_data_processed.csv')
data.head()

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_1,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,DEFAULT_IND
0,1,20000,2,2,1,24,2,2,-1,-1,...,0,0,0,0,689,0,0,0,0,1
1,2,120000,2,2,2,26,-1,2,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,3,90000,2,2,2,34,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,4,50000,2,2,1,37,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,5,50000,1,2,1,57,-1,0,-1,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0


##### Create Functions

In [3]:
# FUNCTION 1

def train_model(X_train, y_train, model_nm):
    parameters = get_parameters() 
    model = model_nm(
        max_depth = parameters['max_depth'],
        min_samples_split = parameters['min_samples_split'],
        max_features = parameters['max_features']
        )
    
    model.fit(X_train, y_train)

    return model, parameters


def get_parameters():
    max_depth = int(input('Max Depth: '))
    min_samples = float(input('Min Samples: '))
    max_features = int(input('Max Features: '))

    return {'max_depth' : max_depth, 'min_samples_split' : min_samples, 'max_features' : max_features}


# FUNCTION 2

def score_model(model_obj, X_test):
    y_pred = model_obj.predict(X_test)

    return  y_pred


# FUNCTION 3

def evaluate_model(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred)
    rec = recall_score(y_true, y_pred)

    return {'accuracy':acc, 'precision':prec, 'recall':rec}


# FUNCTION 4

# def create_evaluation_plots(model_obj, y_test, y_pred):
#     cm = confusion_matrix(y_test, y_pred, labels = model_obj.classes_)
#     ConfusionMatrixDisplay(cm, display_labels = model_obj.classes_)
#     plt.savefig('confusion_matrix.png')

    # RocCurveDisplay(model_obj, X_test, y_test)
    # plt.savefig('roc_curve.png')

##### Configure Parameters

In [4]:
X = [*data.columns][:-1]
y = [*data.columns][-1]

print(f"Predictors: Count: {len(X)} | {X}")
print(f"Target:                 ['{y}']")

Predictors: Count: 24 | ['ID', 'LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_1', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']
Target:                 ['DEFAULT_IND']


##### Model Building

In [5]:
X_train, X_test, y_train, y_test = train_test_split(data[X], data[y], test_size = .3, random_state = 0)

print(f'X_train: {len(X_train)}')
print(f'y_train: {len(y_train)}')
print(f'X_test: {len(X_test)}')
print(f'y_test: {len(X_test)}')

X_train: 21000
y_train: 21000
X_test: 9000
y_test: 9000


##### MLFlow

In [10]:
# Step 1: Get tracking URI to know the current directory where mlruns folder will be saved
#mlflow.get_tracking_uri()

# Step 2: Set tracking URI (DagsHub)
remote_server_uri = 
mlflow.set_tracking_uri('../04_models/mlruns')

# Step 3: Create MLFlow Experiment
try:
    experimentv1_id = mlflow.create_experiment("Credit Card Defaults Experiment_Manual Tuning")
except:
    pass

# Step 4: Get MLFlow Experiment details
experimentv1 = mlflow.get_experiment_by_name("Credit Card Defaults Experiment_Manual Tuning")
experimentv1_id = experimentv1.experiment_id
experimentv1_name = experimentv1.name
print(experimentv1_id)
print(experimentv1_name)

890392801274749997
Credit Card Defaults Experiment_Manual Tuning


In [None]:
with mlflow.start_run(experiment_id=experimentv1_id, run_name = input('Run Name: ')):
    
    # Train model
    model, parameters = train_model(X_train, y_train, DecisionTreeClassifier)

    # Score model
    y_pred = score_model(model, X_test)

    # Evauate model
    eval_metrics = evaluate_model(y_test, y_pred)

    # Plot
    # create_evaluation_plots(model, X_test, y_test)

    # ML Fow Log Parameters
    mlflow.log_param('max_depth', parameters['max_depth'])
    mlflow.log_param('min_samples_split', parameters['min_samples_split'])
    mlflow.log_param('max_features', parameters['max_features'])

    # ML Fow Log Metrics
    mlflow.log_metric('accuracy', eval_metrics['accuracy'])
    mlflow.log_metric('precision', eval_metrics['precision'])
    mlflow.log_metric('recall', eval_metrics['recall'])

    # ML Fow Log Artifacts
    # mlflow.log_artifact('confusion_matrix.png', 'confusion_matrix')
    # mlflow.log_artifact('roc_curve.png', 'confusion_matrix')

    # Log Model
    mlflow.sklearn.log_model(model, "model")