In [1]:
import sys
import os

# Add the project root (one level up from src) to sys.path
sys.path.append(os.path.abspath(".."))
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [2]:
# Load and Split Data
from src.train import ModelTrainer
trainer = ModelTrainer("../data/processed/proxy_target.csv")
trainer.load_data()
trainer.split_data()

Data loaded. Shape: (3742, 21)
Data split completed.


In [3]:
# Logistic Regression Tuning
log_params = {
    "C": [0.01, 0.1, 1, 10],
    "penalty": ["l2"],
    "solver": ["liblinear"]
}
best_log_model = trainer.tune_model(LogisticRegression(random_state=42), log_params)

# Random Forest Tuning
rf_params = {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5],
}
best_rf_model = trainer.tune_model(RandomForestClassifier(random_state=42), rf_params)

# Log best models
trainer.log_model(best_log_model, "logistic_regression_tuned")
trainer.log_model(best_rf_model, "random_forest_tuned")

Best params: {'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}
Best score: 0.5942067736185384




Best params: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Best score: 0.8108359133126936




Evaluation Metrics:
Accuracy: 0.9879839786381842
Precision: 0.45454545454545453
Recall: 0.625
F1 Score: 0.5263157894736842
ROC AUC: 0.9936740890688259




Evaluation Metrics:
Accuracy: 0.9893190921228304
Precision: 0.5
Recall: 0.375
F1 Score: 0.42857142857142855
ROC AUC: 0.9914811066126856


In [4]:
import mlflow

# End any currently active run
if mlflow.active_run() is not None:
    mlflow.end_run()

with mlflow.start_run() as run:
    print("Run ID:", run.info.run_id)

    # Your training, tuning, and logging goes here
    trainer.log_model(best_log_model, "logistic_regression_tuned")



Run ID: c8d1d57fd54c4f43bfaf1d4b3568aedc




Evaluation Metrics:
Accuracy: 0.9879839786381842
Precision: 0.45454545454545453
Recall: 0.625
F1 Score: 0.5263157894736842
ROC AUC: 0.9936740890688259


In [5]:
run_id = "27abc8cbf6274a0babc99b7af60bccfb"
model_name = "credit_risk_model"

# Update this to match how you logged your logistic regression model artifact
model_uri = f"runs:/{run_id}/logistic_regression_tuned"  

result = mlflow.register_model(model_uri=model_uri, name=model_name)

print(f"Registered model name: {result.name}")
print(f"Model version: {result.version}")


Successfully registered model 'credit_risk_model'.
Created version '1' of model 'credit_risk_model'.


Registered model name: credit_risk_model
Model version: 1


In [6]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
print(client.list_registered_models())  # Should work now


AttributeError: 'MlflowClient' object has no attribute 'list_registered_models'