> ### **MLFLow common functions**: 
           
> 1- .is_tracking_uri_set()

> 2- .get_tracking_uri()

> 3- .set_tracking_uri()

> 4- .set_experiment()

> 5- .start_run() as run:

> 6- .models.infer_signature()

> 7- .log_params() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> .log_param()

> 8- .log_metrics() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> .log_metric()

> 9- .sklearn.log_model() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> logging a record of all models but not registering it until found best one

> 10- .sklearn.autolog() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> used before model training and this fn gets automatically invoked when .fit() method is called

> 11- .log_artifact() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> artifacts means plot, code, data

> 12- .get_artifact_uri("model.pkl")

> 13- .log_figure()

> 14- .log_input()

> 15- .data.from_pandas() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> .data.from_numpy() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> .data.from_huggingface()

> 16- .set_tag()

In [21]:
import pandas as pd
import mlflow as mfl
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from mlflow.models import infer_signature

In [22]:
data_set = datasets.load_wine()
data_set.data.shape                                     # (178, 13)

(178, 13)

#### Load wine dataset

In [23]:
X, y = datasets.load_wine(return_X_y = True)
print(X)
print(y)

[[1.423e+01 1.710e+00 2.430e+00 ... 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 ... 1.050e+00 3.400e+00 1.050e+03]
 [1.316e+01 2.360e+00 2.670e+00 ... 1.030e+00 3.170e+00 1.185e+03]
 ...
 [1.327e+01 4.280e+00 2.260e+00 ... 5.900e-01 1.560e+00 8.350e+02]
 [1.317e+01 2.590e+00 2.370e+00 ... 6.000e-01 1.620e+00 8.400e+02]
 [1.413e+01 4.100e+00 2.740e+00 ... 6.100e-01 1.600e+00 5.600e+02]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


> #### 1> Split the train & test data sets

> #### 2> Define the model hyperparameters

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

params = {
    'penalty': 'l2',
    'solver': 'lbfgs',
    'max_iter': 1000,
    'multi_class': 'auto',
    'random_state': 8888
}

> #### Train the model

In [25]:
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


> #### Perform predictions on test data

In [28]:
y_pred = lr.predict(X_test)
y_pred

array([1, 0, 2, 1, 2, 1, 0, 1, 1, 2, 0, 0, 1, 2, 1, 2, 1, 2, 2, 1, 0, 1,
       0, 1, 1, 0, 0, 1, 1, 2, 0, 1, 0, 1, 1, 0])

> #### Calculate metrics

In [27]:
accuracy = accuracy_score(y_test, y_pred)
accuracy

1.0

#### Set mlflow tracking uri

In [30]:
if not mfl.is_tracking_uri_set():
    mfl.set_tracking_uri(uri = 'http://127.0.0.1:5000')

> ### mlflow start server with port -command line 

In [None]:
!mlflow server --port 5000

#### Set mlflow new experiment

In [32]:
from mlflow.models.signature import ModelSignature

try:
	mfl.set_experiment('Logistic Regression - Wine data')
      
except mfl.exceptions.MlflowException as exception:
	print(f'Failed to set experiment: {exception}')

try:
    with mfl.start_run() as lr_run:
        # log the hyperparams
        mfl.log_params(params)
        
        # log the accuracy_metrics
        mfl.log_metric('accuracy is:', accuracy)

        # Assign a tag that indicates the purpose of this run
        mfl.set_tag('Training Info', 'Basic LR model for wine dataset')

        # Infer the model signature
        signature: ModelSignature = infer_signature(X_train, lr.predict(X_train))
        print(signature)

        # log the model
        model_info = mfl.sklearn.log_model(
              sk_model = lr,
              artifact_path = 'wine_data',
              signature = signature,
              input_example = X_train,
              registered_model_name = 'Logistic_Reg_Wine_data_model'
        )

except mfl.exceptions.MlflowException as exception:
    print(f'Failed to start run or log metrics: {exception}')	

2025/03/05 15:48:27 INFO mlflow.tracking.fluent: Experiment with name 'Logistic Regression - Wine data' does not exist. Creating a new experiment.


inputs: 
  [Tensor('float64', (-1, 13))]
outputs: 
  [Tensor('int64', (-1,))]
params: 
  None



Registered model 'Logistic_Reg_Wine_data_model' already exists. Creating a new version of this model...
2025/03/05 15:48:29 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Logistic_Reg_Wine_data_model, version 2


🏃 View run painted-steed-956 at: http://127.0.0.1:5000/#/experiments/905897711914752523/runs/a31b78f8df8042419721313373c7b077
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/905897711914752523


Created version '2' of model 'Logistic_Reg_Wine_data_model'.


> #### Inferencing & validating model

- way 1: via json serving payload

- way 2: via pyfunc.load_model function

In [33]:
# Way 1
model_uri = model_info.model_uri            # gets the model_uri

"""
 The model is logged with an input example. MLflow converts it into the serving payload format for the deployed model endpoint and saves it to serving input payload.json under the /mlruns folder'
"""

import json
json_file_path = '../notebooks/mlruns/832023975994486814/b52972fdff24404790bc642ded7c1662/artifacts/wine_data/serving_input_example.json'   # on each run, modify the path and <run_number> under mlruns folder

with open(json_file_path, 'r') as file:
  serving_payload: dict = json.load(file)

In [34]:
serving_payload

{'inputs': [[13.05,
   1.77,
   2.1,
   17.0,
   107.0,
   3.0,
   3.0,
   0.28,
   2.03,
   5.04,
   0.88,
   3.35,
   885.0],
  [13.39,
   1.77,
   2.62,
   16.1,
   93.0,
   2.85,
   2.94,
   0.34,
   1.45,
   4.8,
   0.92,
   3.22,
   1195.0],
  [13.69,
   3.26,
   2.54,
   20.0,
   107.0,
   1.83,
   0.56,
   0.5,
   0.8,
   5.88,
   0.96,
   1.82,
   680.0],
  [11.61,
   1.35,
   2.7,
   20.0,
   94.0,
   2.74,
   2.92,
   0.29,
   2.49,
   2.65,
   0.96,
   3.26,
   680.0],
  [12.37,
   1.63,
   2.3,
   24.5,
   88.0,
   2.22,
   2.45,
   0.4,
   1.9,
   2.12,
   0.89,
   2.78,
   342.0],
  [12.45,
   3.03,
   2.64,
   27.0,
   97.0,
   1.9,
   0.58,
   0.63,
   1.14,
   7.5,
   0.67,
   1.73,
   880.0],
  [12.77,
   2.39,
   2.28,
   19.5,
   86.0,
   1.39,
   0.51,
   0.48,
   0.64,
   9.899999,
   0.57,
   1.63,
   470.0],
  [12.0,
   1.51,
   2.42,
   22.0,
   86.0,
   1.45,
   1.25,
   0.5,
   1.63,
   3.6,
   1.05,
   2.65,
   450.0],
  [14.06,
   1.63,
   2.28,
   16.0,
 

#### Validate the serving payload

In [35]:
from mlflow.models import validate_serving_input
validate_serving_input(model_uri, serving_payload)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 858.91it/s]  


array([0, 0, 2, 1, 1, 2, 2, 1, 0, 2, 0, 0, 0, 2, 2, 0, 2, 0, 1, 0, 0, 2,
       1, 2, 1, 0, 0, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 2, 2, 2, 0, 2, 2, 0,
       1, 2, 0, 1, 0, 1, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 0, 2, 0, 1, 0, 0,
       1, 0, 2, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 2, 0, 2, 2, 1, 0, 0,
       1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 2, 1, 2, 2, 1, 0, 1,
       0, 0, 1, 1, 2, 1, 1, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2,
       1, 1, 1, 2, 2, 2, 0, 0, 1, 2])

In [36]:
# Way 2
loaded_model = mfl.pyfunc.load_model(model_uri)
predictions = loaded_model.predict(X_test)
wine_data_features = datasets.load_wine().feature_names

final_result_df = pd.DataFrame(X_test, columns = wine_data_features) 
final_result_df['actual_class'] = y_test 
final_result_df['predcited_class'] = predictions

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 260.03it/s]


In [37]:
final_result_df[:10]      # display top 10 rows

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,actual_class,predcited_class
0,12.08,1.33,2.3,23.6,70.0,2.2,1.59,0.42,1.38,1.74,1.07,3.21,625.0,1,1
1,13.63,1.81,2.7,17.2,112.0,2.85,2.91,0.3,1.46,7.3,1.28,2.88,1310.0,0,0
2,13.4,3.91,2.48,23.0,102.0,1.8,0.75,0.43,1.41,7.3,0.7,1.56,750.0,2,2
3,12.29,2.83,2.22,18.0,88.0,2.45,2.25,0.25,1.99,2.15,1.15,3.3,290.0,1,1
4,13.36,2.56,2.35,20.0,89.0,1.4,0.5,0.37,0.64,5.6,0.7,2.47,780.0,2,2
5,12.07,2.16,2.17,21.0,85.0,2.6,2.65,0.37,1.35,2.76,0.86,3.28,378.0,1,1
6,12.93,3.8,2.65,18.6,102.0,2.41,2.41,0.25,1.98,4.5,1.03,3.52,770.0,0,0
7,12.52,2.43,2.17,21.0,88.0,2.55,2.27,0.26,1.22,2.0,0.9,2.78,325.0,1,1
8,12.08,1.39,2.5,22.5,84.0,2.56,2.29,0.43,1.04,2.9,0.93,3.19,385.0,1,1
9,13.48,1.67,2.64,22.5,89.0,2.6,1.1,0.52,2.29,11.75,0.57,1.78,620.0,2,2
