In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import pycaret
from pycaret.classification import *
import mlflow

In [2]:
MLFLOW_PATH = "http://localhost:5001/"

In [3]:
mlflow.set_tracking_uri(MLFLOW_PATH)
mlflow.autolog()

2025/04/05 14:30:46 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/04/05 14:30:46 INFO mlflow.tracking.fluent: Autologging successfully enabled for statsmodels.


In [4]:
mlflow.set_experiment("Certificate of Deposit Campaign")

<Experiment: artifact_location='mlflow-artifacts:/4', creation_time=1743757036931, experiment_id='4', last_update_time=1743757036931, lifecycle_stage='active', name='Certificate of Deposit Campaign', tags={}>

In [5]:
data = pd.read_excel(r"C:\Users\admin\Documents\Projects\MLE\data\bank.xlsx")

In [44]:
# Step 4: Setup PyCaret with MLflow integration
print("Setting up PyCaret environment...")
with mlflow.start_run(run_name="model_selection"):
    # Initialize setup
    clf = setup(
        data=data,
        
        target='y',
        session_id=12,
        ignore_features=['CIF', 'date'],  # CIF is likely just an ID number
        
        # Preproceessing Setup
        preprocess= True,
        
        normalize=False,
        normalize_method="zscore",
        
        imputation_type=None,
        numeric_imputation=0,
        categorical_imputation="UNKNOWN",
        
        max_encoding_ohe=10,
        encoding_method=None,
        
        rare_value="OTHER",
        rare_to_value=0.05,
        
        polynomial_features=False,
        polynomial_degree=2,
        
        remove_multicollinearity=False,
        multicollinearity_threshold=0.9,
        
        remove_outliers=False,
        
        fix_imbalance=False,  # Address class imbalance
        pca=False,
        
        feature_selection=False,
        n_features_to_select=0.5,
        
        fold=5,
        fold_strategy="kfold",
        fold_shuffle=True,
        
        n_jobs=4,
        
        log_experiment=False,
        experiment_name="Certificate of Deposit Campaign",
        experiment_custom_tags={
            "Model_Type": "Pycaret Model"
        },
        log_plots=True,
        verbose=False
    )
    
    # Log PyCaret setup details to MLflow only, without permanent local file
    setup_details = pull()
    temp_setup_path = 'temp_pycaret_setup.csv'
    setup_details.to_csv(temp_setup_path)
    mlflow.log_artifact(temp_setup_path)
    os.remove(temp_setup_path)
    
    # Step 5: Compare Models
    print("Comparing different ML models...")
    models_comparison = compare_models()
    
    # Log comparison results to MLflow without local file
    comparison_results = pull()
    temp_comparison_path = 'temp_model_comparison.csv'
    comparison_results.to_csv(temp_comparison_path)
    mlflow.log_artifact(temp_comparison_path)
    os.remove(temp_comparison_path)

Setting up PyCaret environment...


2025/04/05 15:06:33 INFO mlflow.tracking._tracking_service.client: 🏃 View run Session Initialized 8990 at: http://localhost:5001/#/experiments/4/runs/036f1d0b830b483dbc16638970783df6.
2025/04/05 15:06:33 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/4.


Comparing different ML models...


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.8875,0.7127,0.133,0.5377,0.2127,0.1758,0.2268,0.276
ada,Ada Boost Classifier,0.8872,0.7063,0.11,0.577,0.1816,0.1501,0.2131,0.234
lightgbm,Light Gradient Boosting Machine,0.8862,0.6758,0.1317,0.5403,0.2111,0.173,0.2251,0.176
lr,Logistic Regression,0.8859,0.7103,0.0697,0.5215,0.1215,0.0988,0.1582,1.238
ridge,Ridge Classifier,0.8853,0.705,0.0205,0.5,0.0387,0.0319,0.0837,0.078
dummy,Dummy Classifier,0.8846,0.5,0.0,0.0,0.0,0.0,0.0,0.092
rf,Random Forest Classifier,0.8843,0.6713,0.1135,0.5003,0.1835,0.1474,0.1962,0.284
knn,K Neighbors Classifier,0.8761,0.5058,0.0059,0.0622,0.0106,-0.0073,-0.0149,0.146
lda,Linear Discriminant Analysis,0.8748,0.7049,0.2246,0.4294,0.2918,0.2309,0.2471,0.114
et,Extra Trees Classifier,0.8679,0.6452,0.1333,0.3235,0.1879,0.1295,0.1452,0.28


2025/04/05 15:06:59 INFO mlflow.tracking._tracking_service.client: 🏃 View run model_selection at: http://localhost:5001/#/experiments/4/runs/fb55f00c1cf040928e015a61d1f656c2.
2025/04/05 15:06:59 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/4.


In [None]:
final_lr = finalize_model(models_comparison)

2025/04/05 15:01:25 INFO mlflow.tracking._tracking_service.client: 🏃 View run Gradient Boosting Classifier at: http://localhost:5001/#/experiments/4/runs/edd61dcfe30349089cbd961dcc9ab98b.
2025/04/05 15:01:25 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/4.


In [40]:
final_lr

In [18]:
lb = get_leaderboard()

Processing:   0%|          | 0/15 [00:00<?, ?it/s]

In [20]:
lb

Unnamed: 0_level_0,Model Name,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,Logistic Regression,"(TransformerWrapper(exclude=None,\n ...",0.8859,0.7103,0.0697,0.5215,0.1215,0.0988,0.1582
1,K Neighbors Classifier,"(TransformerWrapper(exclude=None,\n ...",0.8761,0.5058,0.0059,0.0622,0.0106,-0.0073,-0.0149
2,Naive Bayes,"(TransformerWrapper(exclude=None,\n ...",0.817,0.7104,0.3592,0.2862,0.3154,0.2136,0.2162
3,Decision Tree Classifier,"(TransformerWrapper(exclude=None,\n ...",0.8044,0.5572,0.2359,0.1975,0.2136,0.1033,0.1045
4,SVM - Linear Kernel,"(TransformerWrapper(exclude=None,\n ...",0.8331,0.4498,0.0405,0.0664,0.0501,-0.025,-0.0286
5,Ridge Classifier,"(TransformerWrapper(exclude=None,\n ...",0.8853,0.705,0.0205,0.5,0.0387,0.0319,0.0837
6,Random Forest Classifier,"(TransformerWrapper(exclude=None,\n ...",0.8843,0.6713,0.1135,0.5003,0.1835,0.1474,0.1962
7,Quadratic Discriminant Analysis,"(TransformerWrapper(exclude=None,\n ...",0.676,0.6839,0.5627,0.1943,0.2853,0.1385,0.1733
8,Ada Boost Classifier,"(TransformerWrapper(exclude=None,\n ...",0.8872,0.7063,0.11,0.577,0.1816,0.1501,0.2131
9,Gradient Boosting Classifier,"(TransformerWrapper(exclude=None,\n ...",0.8875,0.7127,0.133,0.5377,0.2127,0.1758,0.2268


In [19]:
lb.iloc[0]['Model']

In [46]:
import mlflow
logged_model = 'runs:/4bbca3f2aabd45229479b5d8f7e94664/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
loaded_model.predict(pd.DataFrame(data))

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

 - mlflow (current: 2.16.0, required: mlflow==2.21.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


array([0, 0, 0, ..., 0, 0, 0], dtype=int64)