> ### **MLFLow common functions**: 
           
> 1- .is_tracking_uri_set()

> 2- .get_tracking_uti()

> 3- .set_tracking_uri()

> 4- .set_experiment()

> 5- .start_run() as run:

> 6- .models.infer_signature()

> 7- .log_params() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> .log_param()

> 8- .log_metrics() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> .log_metric()

> 9- .sklearn.log_model() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> logging a record of all models but not registering it until found best one

> 10- .sklearn.autolog() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> used before model training and this fn gets automatically invoked when .fit() method is called

> 11- .log_artifact() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> artifacts means plot, code, data

> 12- .get_artifact_uri("model.pkl")

> 13- .log_figure()

> 14- .log_input()

> 15- .data.from_pandas() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> .data.from_numpy() &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; --> .data.from_huggingface()

> 16- .set_tag()

In [1]:
import pandas as pd
import mlflow as mfl
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from mlflow.models import infer_signature

In [2]:
data_set = datasets.load_wine()
data_set.data.shape                                     # (178, 13)

(178, 13)

#### Load wine dataset

In [3]:
X, y = datasets.load_wine(return_X_y = True)
print(X)
print(y)

[[1.423e+01 1.710e+00 2.430e+00 ... 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 ... 1.050e+00 3.400e+00 1.050e+03]
 [1.316e+01 2.360e+00 2.670e+00 ... 1.030e+00 3.170e+00 1.185e+03]
 ...
 [1.327e+01 4.280e+00 2.260e+00 ... 5.900e-01 1.560e+00 8.350e+02]
 [1.317e+01 2.590e+00 2.370e+00 ... 6.000e-01 1.620e+00 8.400e+02]
 [1.413e+01 4.100e+00 2.740e+00 ... 6.100e-01 1.600e+00 5.600e+02]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


> #### 1> Split the train & test data sets

> #### 2> Define the model hyperparameters

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

params = {
    'penalty': 'l2',
    'solver': 'lbfgs',
    'max_iter': 1000,
    'multi_class': 'auto',
    'random_state': 8888
}

> #### Train the model

In [5]:
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


> #### Perform predictions on test data

In [6]:
y_pred = lr.predict(X_test)
y_pred

array([1, 0, 1, 2, 2, 2, 2, 1, 0, 0, 2, 0, 2, 0, 2, 2, 0, 1, 2, 0, 2, 0,
       2, 2, 0, 1, 1, 2, 0, 1, 1, 1, 0, 0, 0, 1])

> #### Calculate metrics

In [13]:
accuracy = accuracy_score(y_test, y_pred)
accuracy

1.0

#### Set mlflow tracking uri

In [16]:
if not mfl.is_tracking_uri_set():
    mfl.set_tracking_uri(uri = 'http://127.0.0.1:5000')

#### Set mlflow new experiment

In [21]:
from mlflow.models.signature import ModelSignature

try:
	mfl.set_experiment('Logistic Regression - Wine data')
      
except mfl.exceptions.MlflowException as exception:
	print(f'Failed to set experiment: {exception}')

try:
    with mfl.start_run() as lr_run:
        # log the hyperparams
        mfl.log_params(params)
        
        # log the accuracy_metrics
        mfl.log_metric('accuracy is:', accuracy)

        # Assign a tag that indicates the purpose of this run
        mfl.set_tag('Training Info', 'Basic LR model for wine dataset')

        # Infer the model signature
        signature: ModelSignature = infer_signature(X_train, lr.predict(X_train))
        print(signature)

        # log the model
        model_info = mfl.sklearn.log_model(
              sk_model = lr,
              artifact_path = 'wine_data',
              signature = signature,
              input_example = X_train,
              registered_model_name = 'Logistic_Reg_Wine_data_model'
        )

except mfl.exceptions.MlflowException as exception:
    print(f'Failed to start run or log metrics: {exception}')	

Failed to set experiment: API request to endpoint /api/2.0/mlflow/experiments/get-by-name failed with error code 403 != 200. Response body: ''
Failed to start run or log metrics: API request to endpoint /api/2.0/mlflow/runs/create failed with error code 403 != 200. Response body: ''


: 

> ### mlflow start server with port -command line 

In [10]:
!mlflow server --port 5000

> #### Inferencing & validating model

- way 1: via json serving payload

- way 2: via pyfunc.load_model function

In [11]:
import json
from mlflow.models import validate_serving_input

In [None]:
# Way 1
model_uri = model_info.model.uri            # gets the model_uri

"""
 The model is logged with an input example. MLflow converts it into the serving payload format for the deployed model endpoint and saves it to serving input payload.json under the /mlruns folder'
"""

json_file_path = '/content/mlruns/570590257539470643/be5da416ded642db8cad356fb5843bdd/artifacts/wine_data/serving_input_example.json'   # on each run, modify the path and <run_number> under mlruns folder

with open(json_file_path, 'r') as file:
  serving_payload: dict = json.load(file)

In [None]:
serving_payload

#### Validate the serving payload

In [None]:
validate_serving_input(model_uri, serving_payload)

In [None]:
# Way 2
loaded_model = mfl.pyfunc.load_model(model_uri)
predictions = loaded_model.predict(X_test)
wine_data_features = datasets.load_wine().feature_names

final_result_df = pd.DataFrame(X_test, columns = wine_data_features) 
final_result_df['actual_class'] = y_test 
final_result_df['predcited_class'] = predictions

In [None]:
final_result_df[:10]      # display top 10 rows