In [7]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score, f1_score, recall_score
import sys
import warnings
from itertools import cycle
warnings.filterwarnings("ignore")
np.random.seed(40)
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature
import pickle
#Calling the preprocessor pickle
with open('prepic.pkl', 'rb') as f:
    pre = pickle.load(f)

mlflow.set_tracking_uri('http://127.0.0.1:5000')

train = pd.read_csv(r"D:\mlopset\train_set_ml.csv")

test = pd.read_csv(r"D:\mlopset\test_set_ml.csv")

import xgboost as xg
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier


def eval_metrics(actual, pred):
    accuracy =  accuracy_score(actual, pred)
    f1 = f1_score(actual, pred)
    recall = recall_score(actual, pred)
    return accuracy, f1, recall

train_x = train.drop(["status"], axis=1)
test_x = test.drop(["status"], axis=1)
train_y = train[["status"]]
test_y = test[["status"]]

train_x_tf = pre.transform(train_x)
test_x_tf = pre.transform(test_x)

# Define the experiment name
experiment_name = "loan_status_experiment_1"

# Check if the experiment exists
experiment = mlflow.get_experiment_by_name(experiment_name)

if experiment is None:
    # If the experiment does not exist, create it
    mlflow.create_experiment(experiment_name)

# Set the experiment
mlflow.set_experiment(experiment_name)

# Defining the hyperparameters

max_depth_v = [6,8,10]


for max_depth in max_depth_v:
    # Run XGB
    xgb = xg.XGBClassifier(max_depth=max_depth, random_state=42)
    xgb.fit(train_x_tf, train_y)
    predicted_qualities = xgb.predict(test_x_tf)
    (accuracy, f1, recall) = eval_metrics(test_y, predicted_qualities)

    #Run Gradientboost
    gb = GradientBoostingClassifier(max_depth=max_depth, random_state=42)
    gb.fit(train_x_tf, train_y)
    predicted_qualities_gb = gb.predict(test_x_tf)
    (accuracy_gb, f1_gb, recall_gb) = eval_metrics(test_y, predicted_qualities_gb)

    # Print out XGB model metrics
    print(f"XGBoost Model (max_depth={max_depth:f}")
    print(f"  accuracy: {accuracy}")
    print(f"  f1: {f1}")
    print(f"  recall: {recall}")

    # Print out GB model metrics
    print(f"GBoost Model (max_depth={max_depth:f}")
    print(f"  accuracygb: {accuracy_gb}")
    print(f"  f1gb: {f1_gb}")
    print(f"  recallgb: {recall_gb}")

    # Infer model signature
    predictions = xgb.predict(train_x_tf)
    signature = infer_signature(train_x_tf, predictions)

    # Infer model signature
    predictionsgb = gb.predict(train_x_tf)
    signaturegb = infer_signature(train_x_tf, predictionsgb)
        
    with mlflow.start_run(run_name=f"max_depth_{max_depth}"):
        
            
        # Log mlflow attributes for mlflow UI
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("f1", f1)
        mlflow.log_metric("recall", recall)
        mlflow.sklearn.log_model(xgb, "model", signature=signature)
        
        # Log mlflow attributes for mlflow UI
        mlflow.log_param("max_depthgb", max_depth)
        mlflow.log_metric("accuracygb", accuracy_gb)
        mlflow.log_metric("f1gb", f1_gb)
        mlflow.log_metric("recallgb", recall_gb)
        mlflow.sklearn.log_model(gb, "model1", signature=signaturegb)

# end current run
mlflow.end_run()

# Get experiment ID
experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id

# Search all runs in experiment_id
runs = mlflow.search_runs(experiment_ids=experiment_id)

# display runs dataframe
runs

XGBoost Model (max_depth=6.000000
  accuracy: 0.7849462365591398
  f1: 0.6551724137931033
  recall: 0.59375
GBoost Model (max_depth=6.000000
  accuracygb: 0.7204301075268817
  f1gb: 0.5357142857142857
  recallgb: 0.46875
XGBoost Model (max_depth=8.000000
  accuracy: 0.7311827956989247
  f1: 0.5762711864406779
  recall: 0.53125
GBoost Model (max_depth=8.000000
  accuracygb: 0.7956989247311828
  f1gb: 0.6545454545454547
  recallgb: 0.5625
XGBoost Model (max_depth=10.000000
  accuracy: 0.7849462365591398
  f1: 0.6428571428571429
  recall: 0.5625
GBoost Model (max_depth=10.000000
  accuracygb: 0.7311827956989247
  f1gb: 0.5098039215686275
  recallgb: 0.40625


Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.recall,metrics.f1,metrics.accuracygb,metrics.f1gb,metrics.recallgb,metrics.accuracy,params.max_depth,params.max_depthgb,tags.mlflow.user,tags.mlflow.log-model.history,tags.mlflow.source.type,tags.mlflow.source.name,tags.mlflow.runName
0,5eb807ea7ce84777b3a0e5436b38eb33,694569894209175367,FINISHED,mlflow-artifacts:/694569894209175367/5eb807ea7...,2024-01-17 09:59:15.019000+00:00,2024-01-17 09:59:26.500000+00:00,0.5625,0.642857,0.731183,0.509804,0.40625,0.784946,10,10,admin,"[{""run_id"": ""5eb807ea7ce84777b3a0e5436b38eb33""...",LOCAL,C:\Users\admin\anaconda3\Lib\site-packages\ipy...,max_depth_10
1,1459510869c94e0f8cf333e7ff491370,694569894209175367,FINISHED,mlflow-artifacts:/694569894209175367/145951086...,2024-01-17 09:59:00.390000+00:00,2024-01-17 09:59:11.725000+00:00,0.53125,0.576271,0.795699,0.654545,0.5625,0.731183,8,8,admin,"[{""run_id"": ""1459510869c94e0f8cf333e7ff491370""...",LOCAL,C:\Users\admin\anaconda3\Lib\site-packages\ipy...,max_depth_8
2,736b4e11d83f4303802a03846dfad27c,694569894209175367,FINISHED,mlflow-artifacts:/694569894209175367/736b4e11d...,2024-01-17 09:58:47.871000+00:00,2024-01-17 09:58:58.645000+00:00,0.59375,0.655172,0.72043,0.535714,0.46875,0.784946,6,6,admin,"[{""run_id"": ""736b4e11d83f4303802a03846dfad27c""...",LOCAL,C:\Users\admin\anaconda3\Lib\site-packages\ipy...,max_depth_6
