### **Setup & Installation**

In [None]:
# Install MLflow and dependencies
!pip install -q mlflow scikit-learn pandas matplotlib hyperopt

# If you're using Google Drive to store logs/artifacts
from google.colab import drive
drive.mount('/content/drive')

# Create a directory for MLflow logs (optional, can change path as needed)
import os
os.environ['MLFLOW_TRACKING_URI'] = 'file:/content/drive/MyDrive/mlruns'

# Verify setup
import mlflow
print("MLflow version:", mlflow.__version__)
print("Tracking URI:", mlflow.get_tracking_uri())


Mounted at /content/drive
MLflow version: 2.22.0
Tracking URI: file:/content/drive/MyDrive/mlruns


In [None]:
import warnings

In [None]:
warnings.filterwarnings('ignore')

### **Loading and Preprocessing**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load and rename columns
df = pd.read_csv('/content/dataset_.csv')
df.columns = [
    'age',
    'income',
    'home_ownership',
    'emp_length',
    'loan_purpose',
    'loan_grade',
    'loan_amount',
    'interest_rate',
    'loan_status',
    'percent_income',
    'defaulted',
    'credit_history_length'
]

# Remove '?' rows
df = df[(df['emp_length'] != '?') & (df['interest_rate'] != '?')]

# Convert to proper types
df['emp_length'] = df['emp_length'].astype(int)
df['interest_rate'] = df['interest_rate'].astype(float)

# Map target to 0/1 and drop original
df['label'] = df['defaulted'].map({'Y': 1, 'N': 0})
df.drop(columns=['defaulted'], inplace=True)

# One-hot encode categoricals
categorical_cols = ['home_ownership', 'loan_purpose', 'loan_grade', 'loan_status']
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

# Split features/labels
X = df.drop(columns=['label'])
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print("Training shape:", X_train.shape)

Training shape: (22910, 22)


### **Logistic Regression**

In [None]:
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Train and log Logistic Regression
with mlflow.start_run(run_name="logreg_final_model"):
    logreg_model = LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42)
    logreg_model.fit(X_train, y_train)

    y_pred = logreg_model.predict(X_test)

    # Compute evaluation metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Log model parameters and metrics
    mlflow.log_params({
        "model_type": "LogisticRegression",
        "class_weight": "balanced",
        "max_iter": 1000,
        "random_state": 42
    })

    mlflow.log_metrics({
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1_score": f1
    })

    # Log model
    mlflow.sklearn.log_model(logreg_model, "model")

    # Display results
    print("\nLogistic Regression Evaluation Metrics:")
    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=["No Default", "Default"]))





Logistic Regression Evaluation Metrics:
Accuracy:  0.8158
Precision: 0.4916
Recall:    0.9706
F1 Score:  0.6526

Classification Report:
               precision    recall  f1-score   support

  No Default       0.99      0.78      0.87      4707
     Default       0.49      0.97      0.65      1021

    accuracy                           0.82      5728
   macro avg       0.74      0.88      0.76      5728
weighted avg       0.90      0.82      0.84      5728



In [None]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, classification_report

def objective(params):
    with mlflow.start_run(nested=True):
        params['max_iter'] = int(params['max_iter'])  # convert float to int

        model = LogisticRegression(
            C=params['C'],
            max_iter=params['max_iter'],
            solver=params['solver'],
            class_weight='balanced',
            random_state=42
        )
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        f1 = f1_score(y_test, y_pred)
        report = classification_report(y_test, y_pred, target_names=["No Default", "Default"])

        # MLflow logging
        mlflow.log_params(params)
        mlflow.log_metric("f1_score", f1)

        print("\nClassification Report:\n", report)
        print(f"Params: {params} | F1: {f1:.4f}")

        return {'loss': -f1, 'status': STATUS_OK}

# Search space
search_space = {
    'C': hp.loguniform('C', -4, 2),  # ~0.018 to ~7.3
    'max_iter': hp.quniform('max_iter', 100, 1000, 50),
    'solver': hp.choice('solver', ['liblinear', 'lbfgs', 'saga'])
}

# Run Hyperopt
with mlflow.start_run(run_name="logreg_hyperopt_tuning"):
    trials = Trials()
    best_result = fmin(
        fn=objective,
        space=search_space,
        algo=tpe.suggest,
        max_evals=20,
        trials=trials
    )

print("\nBest hyperparameters found by Hyperopt:", best_result)



Classification Report:

              precision    recall  f1-score   support

  No Default       1.00      0.78      0.87      4707
     Default       0.49      0.98      0.66      1021

    accuracy                           0.82      5728
   macro avg       0.74      0.88      0.77      5728
weighted avg       0.91      0.82      0.84      5728

Params: {'C': 0.14584999264014506, 'max_iter': 950, 'solver': 'lbfgs'} | F1: 0.6558

Classification Report:

              precision    recall  f1-score   support

  No Default       0.99      0.79      0.88      4707
     Default       0.49      0.96      0.65      1021

    accuracy                           0.82      5728
   macro avg       0.74      0.87      0.76      5728
weighted avg       0.90      0.82      0.84      5728

Params: {'C': 0.40893671984515656, 'max_iter': 650, 'solver': 'lbfgs'} | F1: 0.6513

Classification Report:

              precision    recall  f1-score   support

  No Default       1.00      0.78      0.87     

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Get best parameters from Hyperopt
best_params = {
    'C': 0.04486897242869308,
    'max_iter': 850,
    'solver': 'liblinear'  # index 0 from ['liblinear', 'lbfgs', 'saga']
}

# Train final model with best params
final_model = LogisticRegression(
    C=best_params['C'],
    max_iter=int(best_params['max_iter']),
    solver=best_params['solver'],
    class_weight='balanced',
    random_state=42
)
final_model.fit(X_train, y_train)

# Log final model to MLflow
with mlflow.start_run(run_name="final_deployment_model"):
    mlflow.log_params(best_params)
    mlflow.sklearn.log_model(final_model, "model")
    print("Final model trained and logged to MLflow.")

    # Evaluate again
    y_pred = final_model.predict(X_test)
    report = classification_report(y_test, y_pred, target_names=["No Default", "Default"])
    print("\nFinal Classification Report:\n", report)

# Create a prediction function
def predict_loan_default(new_data_df):
    # Simulated API for predicting default
    prediction = final_model.predict(new_data_df)
    proba = final_model.predict_proba(new_data_df)
    return prediction, proba




Final model trained and logged to MLflow.

Final Classification Report:
               precision    recall  f1-score   support

  No Default       1.00      0.78      0.87      4707
     Default       0.49      1.00      0.66      1021

    accuracy                           0.82      5728
   macro avg       0.75      0.89      0.77      5728
weighted avg       0.91      0.82      0.84      5728



In [None]:
# Simulate new input (first row of test data)
sample = X_test.iloc[[0]]  # double brackets to keep as DataFrame
pred, prob = predict_loan_default(sample)

print("Prediction:", "Default" if pred[0] == 1 else "No Default")
print("Probability of Default:", prob[0][1])


Prediction: No Default
Probability of Default: 0.0076806451772255735


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Original baseline
original_preds = final_model.predict(X_test)
original_f1 = f1_score(y_test, original_preds)

print(f"Original Test F1-score: {original_f1:.4f}")

# Simulate drift by adding noise to features (e.g. age, income)
X_test_drifted = X_test.copy()
X_test_drifted['age'] = X_test_drifted['age'] + np.random.randint(5, 15, size=X_test_drifted.shape[0])
X_test_drifted['income'] = X_test_drifted['income'] * np.random.uniform(0.5, 1.5, size=X_test_drifted.shape[0])

# Predict on drifted data
drifted_preds = final_model.predict(X_test_drifted)
drifted_f1 = f1_score(y_test, drifted_preds)

print(f"Drifted Test F1-score: {drifted_f1:.4f}")

# Compare
print("\nPerformance Change:")
print(f"→ F1 dropped from {original_f1:.4f} to {drifted_f1:.4f} due to data drift.")


Original Test F1-score: 0.6604
Drifted Test F1-score: 0.6510

Performance Change:
→ F1 dropped from 0.6604 to 0.6510 due to data drift.


### **Random Forest**

In [None]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Train and log Random Forest
with mlflow.start_run(run_name="rf_final_model"):
    rf_model = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
    rf_model.fit(X_train, y_train)

    y_pred = rf_model.predict(X_test)

    # Compute metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Log parameters and metrics
    mlflow.log_params({
        "model_type": "RandomForest",
        "n_estimators": 100,
        "class_weight": "balanced",
        "random_state": 42
    })

    mlflow.log_metrics({
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1_score": f1
    })

    mlflow.sklearn.log_model(rf_model, "model")

    # Display output
    print("\nRandom Forest Evaluation Metrics:")
    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=["No Default", "Default"]))





Random Forest Evaluation Metrics:
Accuracy:  0.8242
Precision: 0.5077
Recall:    0.4545
F1 Score:  0.4796

Classification Report:
               precision    recall  f1-score   support

  No Default       0.88      0.90      0.89      4707
     Default       0.51      0.45      0.48      1021

    accuracy                           0.82      5728
   macro avg       0.70      0.68      0.69      5728
weighted avg       0.82      0.82      0.82      5728



In [None]:
# Hyperparameter Tuning with Hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.ensemble import RandomForestClassifier

# Define options explicitly for later decoding
max_features_options = ['sqrt', 'log2', None]
max_depth_options = [None, 10, 20, 30, 50]

def rf_objective(params):
    # Convert discrete choices from indices to actual values
    params['max_features'] = max_features_options[params['max_features']]
    params['max_depth'] = max_depth_options[params['max_depth']]
    params['n_estimators'] = int(params['n_estimators'])

    with mlflow.start_run(nested=True):
        rf = RandomForestClassifier(
            n_estimators=params['n_estimators'],
            max_depth=params['max_depth'],
            max_features=params['max_features'],
            class_weight='balanced',
            random_state=42
        )
        rf.fit(X_train, y_train)
        preds = rf.predict(X_test)
        f1 = f1_score(y_test, preds)

        mlflow.log_params(params)
        mlflow.log_metric("f1_score", f1)

        print("\nParams:", params, "| F1:", round(f1, 4))
        print(classification_report(y_test, preds, target_names=["No Default", "Default"]))

        return {'loss': -f1, 'status': STATUS_OK}

# Define search space using index-based choices for categorical options
rf_search_space = {
    'n_estimators': hp.quniform('n_estimators', 100, 500, 50),
    'max_depth': hp.choice('max_depth', list(range(len(max_depth_options)))),
    'max_features': hp.choice('max_features', list(range(len(max_features_options))))
}

with mlflow.start_run(run_name="rf_hyperopt_tuning"):
    rf_trials = Trials()
    best_rf_raw = fmin(fn=rf_objective, space=rf_search_space, algo=tpe.suggest, max_evals=20, trials=rf_trials)

# Decode best_rf_raw to readable values
best_rf = {
    'n_estimators': int(best_rf_raw['n_estimators']),
    'max_depth': max_depth_options[best_rf_raw['max_depth']],
    'max_features': max_features_options[best_rf_raw['max_features']]
}

print("\nBest RF Hyperparameters:", best_rf)



Params:
{'max_depth': 20, 'max_features': None, 'n_estimators': 150}
| F1:
0.6218
              precision    recall  f1-score   support

  No Default       0.96      0.80      0.88      4707
     Default       0.49      0.86      0.62      1021

    accuracy                           0.81      5728
   macro avg       0.73      0.83      0.75      5728
weighted avg       0.88      0.81      0.83      5728


Params:
{'max_depth': 50, 'max_features': 'sqrt', 'n_estimators': 100}
| F1:
0.4796
              precision    recall  f1-score   support

  No Default       0.88      0.90      0.89      4707
     Default       0.51      0.45      0.48      1021

    accuracy                           0.82      5728
   macro avg       0.70      0.68      0.69      5728
weighted avg       0.82      0.82      0.82      5728


Params:
{'max_depth': 20, 'max_features': 'sqrt', 'n_estimators': 150}
| F1:
0.6169
              precision    recall  f1-score   support

  No Default       0.95      0.82     

In [None]:
# Final Random Forest Model Logging
best_rf_params = {
    'n_estimators': 300,
    'max_depth': 20,
    'max_features': 'sqrt'
}

final_rf = RandomForestClassifier(
    n_estimators=best_rf_params['n_estimators'],
    max_depth=best_rf_params['max_depth'],
    max_features=best_rf_params['max_features'],
    class_weight='balanced',
    random_state=42
)
final_rf.fit(X_train, y_train)

with mlflow.start_run(run_name="final_rf_model"):
    mlflow.log_params(best_rf_params)
    mlflow.sklearn.log_model(final_rf, "model")
    print("Final RF model logged to MLflow.")

    y_pred = final_rf.predict(X_test)
    print("\nFinal Classification Report:\n", classification_report(y_test, y_pred, target_names=["No Default", "Default"]))





Final RF model logged to MLflow.

Final Classification Report:
               precision    recall  f1-score   support

  No Default       0.95      0.82      0.88      4707
     Default       0.50      0.82      0.62      1021

    accuracy                           0.82      5728
   macro avg       0.73      0.82      0.75      5728
weighted avg       0.87      0.82      0.84      5728



In [None]:
# Data Drift Simulation for Random Forest
def rf_predict_drift(new_data_df):
    return final_rf.predict(new_data_df), final_rf.predict_proba(new_data_df)

original_f1_rf = f1_score(y_test, final_rf.predict(X_test))

X_test_drifted_rf = X_test.copy()
X_test_drifted_rf['age'] += np.random.randint(5, 15, size=X_test_drifted_rf.shape[0])
X_test_drifted_rf['income'] *= np.random.uniform(0.5, 1.5, size=X_test_drifted_rf.shape[0])

f1_drifted_rf = f1_score(y_test, final_rf.predict(X_test_drifted_rf))

print(f"\nOriginal RF F1-score: {original_f1_rf:.4f} | Drifted F1-score: {f1_drifted_rf:.4f}")
print("Performance Change due to Drift:")
print(f"→ F1 dropped from {original_f1_rf:.4f} to {f1_drifted_rf:.4f}")


Original RF F1-score: 0.6178 | Drifted F1-score: 0.5679
Performance Change due to Drift:
→ F1 dropped from 0.6178 to 0.5679


### **XGBoost**

In [None]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import mlflow
import mlflow.xgboost

# Train and log XGBoost model
with mlflow.start_run(run_name="xgboost_final_model"):
    xgb_model = xgb.XGBClassifier(
        use_label_encoder=False,
        eval_metric='logloss',
        random_state=42
    )
    xgb_model.fit(X_train, y_train)

    y_pred = xgb_model.predict(X_test)

    # Compute metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Log parameters and metrics
    mlflow.log_params({
        "model_type": "XGBoost",
        "eval_metric": "logloss",
        "random_state": 42
    })

    mlflow.log_metrics({
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1_score": f1
    })

    mlflow.xgboost.log_model(xgb_model, "model")

    # Display output
    print("\nXGBoost Evaluation Metrics:")
    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=["No Default", "Default"]))





XGBoost Evaluation Metrics:
Accuracy:  0.8170
Precision: 0.4874
Recall:    0.5132
F1 Score:  0.5000

Classification Report:
               precision    recall  f1-score   support

  No Default       0.89      0.88      0.89      4707
     Default       0.49      0.51      0.50      1021

    accuracy                           0.82      5728
   macro avg       0.69      0.70      0.69      5728
weighted avg       0.82      0.82      0.82      5728



In [None]:
# Hyperparameter tuning

def xgb_objective(params):
    with mlflow.start_run(nested=True):
        clf = xgb.XGBClassifier(
            use_label_encoder=False,
            eval_metric='logloss',
            random_state=42,
            max_depth=int(params['max_depth']),
            learning_rate=params['learning_rate'],
            n_estimators=int(params['n_estimators'])
        )
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        f1 = f1_score(y_test, y_pred)

        mlflow.log_params(params)
        mlflow.log_metric("f1_score", f1)

        print(f"\nParams: {params} | F1: {f1:.4f}")
        print(classification_report(y_test, y_pred))

        return {'loss': -f1, 'status': STATUS_OK}

xgb_search_space = {
    'max_depth': hp.quniform('max_depth', 3, 10, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
    'n_estimators': hp.quniform('n_estimators', 100, 1000, 50)
}

with mlflow.start_run(run_name="xgboost_hyperopt_tuning"):
    trials = Trials()
    best_result = fmin(
        fn=xgb_objective,
        space=xgb_search_space,
        algo=tpe.suggest,
        max_evals=20,
        trials=trials
    )
    print("\nBest hyperparameters found:", best_result)


Params: {'learning_rate': 0.033319549020834165, 'max_depth': 6.0, 'n_estimators': 600.0} | F1: 0.5107
              precision    recall  f1-score   support

           0       0.90      0.88      0.89      4707
           1       0.49      0.54      0.51      1021

    accuracy                           0.82      5728
   macro avg       0.69      0.71      0.70      5728
weighted avg       0.82      0.82      0.82      5728


Params: {'learning_rate': 0.02625159580985007, 'max_depth': 6.0, 'n_estimators': 750.0} | F1: 0.5191
              precision    recall  f1-score   support

           0       0.90      0.88      0.89      4707
           1       0.49      0.55      0.52      1021

    accuracy                           0.82      5728
   macro avg       0.70      0.71      0.70      5728
weighted avg       0.83      0.82      0.82      5728


Params: {'learning_rate': 0.19247173933903416, 'max_depth': 7.0, 'n_estimators': 300.0} | F1: 0.5123
              precision    recall  f1-s

In [None]:
# Train final model with best params
best_xgb_params = {
    'max_depth': int(best_result['max_depth']),
    'learning_rate': best_result['learning_rate'],
    'n_estimators': int(best_result['n_estimators'])
}

final_xgb = xgb.XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42,
    **best_xgb_params
)
final_xgb.fit(X_train, y_train)

with mlflow.start_run(run_name="xgboost_final_model"):
    mlflow.log_params(best_xgb_params)
    mlflow.xgboost.log_model(final_xgb, "model", registered_model_name="LoanDefaultXGBModel")

    y_pred = final_xgb.predict(X_test)
    print("\nFinal XGBoost Classification Report:\n")
    print(classification_report(y_test, y_pred))




Final XGBoost Classification Report:

              precision    recall  f1-score   support

           0       0.90      0.88      0.89      4707
           1       0.49      0.55      0.52      1021

    accuracy                           0.82      5728
   macro avg       0.70      0.71      0.70      5728
weighted avg       0.83      0.82      0.82      5728



Registered model 'LoanDefaultXGBModel' already exists. Creating a new version of this model...
Created version '2' of model 'LoanDefaultXGBModel'.


In [None]:
# Drift Simulation
original_f1 = f1_score(y_test, final_xgb.predict(X_test))
X_test_drifted = X_test.copy()
X_test_drifted['age'] += np.random.randint(5, 15, size=X_test_drifted.shape[0])
X_test_drifted['income'] *= np.random.uniform(0.5, 1.5, size=X_test_drifted.shape[0])
drifted_f1 = f1_score(y_test, final_xgb.predict(X_test_drifted))
print(f"\nOriginal F1: {original_f1:.4f} | Drifted F1: {drifted_f1:.4f}")


Original F1: 0.5191 | Drifted F1: 0.4507


In [None]:
!pip freeze > requirements.txt

from google.colab import files
files.download("requirements.txt")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>