In [22]:
import mlflow
import os
from utils import *
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.linear_model import ElasticNet, LogisticRegression
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from mlflow.models import infer_signature

In [15]:
param_box = read_yaml_file(Path("artifacts/ConfigFiles/params.yaml"))
config_box = read_yaml_file(Path("artifacts/ConfigFiles/config.yaml"))
params_elastic_net = param_box.ElasticNet

yaml file: artifacts\ConfigFiles\params.yaml loaded successfully
yaml file: artifacts\ConfigFiles\config.yaml loaded successfully


In [16]:
# Read in transformed data
config =  config_box.data_transformation

# Load as DataFrame 
income_train = pd.read_csv(config.transformed_train_data)
income_test = pd.read_csv(config.transformed_test_data)

# Get the features and y label for train set
y_train = income_train['label']
train_columns = income_train.columns
feature_columns = train_columns[:-1]
X_train = income_train[feature_columns]

# Get the features and y label for test set
y_test = income_test['label']
X_test = income_test[feature_columns]

In [32]:
# Train the model
logistic_regression = LogisticRegression(penalty= "elasticnet", max_iter=2000,l1_ratio=0.2, solver="saga")
logistic_regression.fit(X_train, y_train)

# Predict on the test set
y_pred = logistic_regression.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
f_score = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)
y_pred

array([0, 0, 0, ..., 1, 1, 1], dtype=int64)

In [33]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

# Create a new MLflow Experiment
mlflow.set_experiment("MLflow LogisticRegression")

# Start an MLflow run
with mlflow.start_run():
    # Log the hyperparameters
    mlflow.log_params({"max_iter":2000, "l1_ratio":0.2, "penalty":"elasticnet", "solver": "saga"})

    # Log the loss metric
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1_score", f_score)
    mlflow.log_metric("roc_auc_score", roc_auc)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic Logistic_regression model for census income dataset")

    # Infer the model signature
    signature = infer_signature(X_train, logistic_regression.predict(X_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=logistic_regression,
        artifact_path="logistic_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-logistic",
    )

Registered model 'tracking-logistic' already exists. Creating a new version of this model...
2024/03/04 16:44:17 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-logistic, version 2
Created version '2' of model 'tracking-logistic'.
