In [1]:
!pip install mlflow



In [2]:
import pandas as pd
import numpy as np
import mlflow
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import set_config
from sklearn.metrics import accuracy_score
from mlflow.models import infer_signature
from mlflow.sklearn import log_model

##Load the data

In [3]:
X,y = load_iris(return_X_y=True,as_frame=True)

X

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [4]:
y

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: target, Length: 150, dtype: int32

Split the data

In [5]:
random_state = 42
test_size = 0.2

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=test_size,random_state=random_state)

print('The shape of the train data is',X_train.shape) 
print('The shape of the test data is ',X_test.shape)

The shape of the train data is (120, 4)
The shape of the test data is  (30, 4)


Feature Transformation

In [6]:
set_config(transform_output='pandas')

In [7]:
scaler = StandardScaler()

label_encoder = LabelEncoder()

# fit_transform on the train data
X_train_trans = scaler.fit_transform(X_train)
y_train = label_encoder.fit_transform(y_train)

# transform on the test data
X_test_trans = scaler.transform(X_test)
y_test = label_encoder.transform(y_test)

In [8]:
X_train_trans

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
22,-1.473937,1.203658,-1.562535,-1.312603
15,-0.133071,2.992376,-1.276006,-1.045633
65,1.085898,0.085709,0.385858,0.289218
11,-1.230143,0.756479,-1.218701,-1.312603
42,-1.717731,0.309299,-1.390618,-1.312603
...,...,...,...,...
71,0.354517,-0.585060,0.156636,0.155733
106,-1.108246,-1.255829,0.443164,0.689673
14,-0.011174,2.098017,-1.447923,-1.312603
92,-0.011174,-1.032239,0.156636,0.022248


In [9]:
y_train

array([0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0, 0, 1, 2, 2, 1, 2, 1, 2,
       1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 0, 2, 2,
       1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 1, 0, 2, 0, 0, 1, 1, 2, 1, 2, 2, 1,
       0, 0, 2, 2, 0, 0, 0, 1, 2, 0, 2, 2, 0, 1, 1, 2, 1, 2, 0, 2, 1, 2,
       1, 1, 1, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 2, 0, 1, 2, 2, 1, 2,
       1, 1, 2, 2, 0, 1, 2, 0, 1, 2], dtype=int64)

Model Training

In [10]:
# set the parameters for the model

model_params = {
    'penalty': 'l2',
    'C': 0.1,
    'max_iter': 200,
    'random_state': 30,
    'n_jobs': -1
}

# fit the model with the parameters

log_reg = LogisticRegression(**model_params)

# fit the model
log_reg.fit(X_train_trans,y_train)

In [11]:
# get the predictions on test data

y_pred = log_reg.predict(X_test_trans)

# calculate the accuracy score
score = accuracy_score(y_test,y_pred)

print(f'The model accuracy is {score:.2f}')

The model accuracy is 0.97


In [12]:
# set the ml flow tracking server
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

# set the experiment name
mlflow.set_experiment('MLflow Demo')

# track the model training with a run
with mlflow.start_run(run_name='logistic demo') as run:
    # log the hyperparameters of the model
    mlflow.log_params(model_params)
    
    # log the accuracy score
    mlflow.log_metric(key='accuracy_score',value=score)
    
    # set the tag for run
    mlflow.set_tag(key='Demo Training',value='Logistic Regression model trained on iris data')

    # infer data signature
    signature = infer_signature(model_input=X_train_trans,model_output=log_reg.predict(X_train_trans))

    # log the model
    sklearn_model = log_model(
        sk_model = log_reg,
        artifact_path='logistic-regression-model',
        registered_model_name='iris-model',
        signature=signature
    )

2024/08/26 12:52:01 INFO mlflow.tracking.fluent: Experiment with name 'MLflow Demo' does not exist. Creating a new experiment.
Successfully registered model 'iris-model'.
2024/08/26 12:52:09 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris-model, version 1
Created version '1' of model 'iris-model'.
2024/08/26 12:52:09 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic demo at: http://127.0.0.1:8080/#/experiments/1/runs/b8d782861cba432497e9e360037d5581.
2024/08/26 12:52:09 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:8080/#/experiments/1.
