# **MLOps with MLFlow**

In [2]:
import mlflow
import torch 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import set_config

import warnings
warnings.filterwarnings("ignore")
set_config(display="diagram")

## **DataSet** 

In [3]:
columns = ["Length (mm)", "Depth (mm)"]
target = "Species"

data_path = "./data/penguins_classification.csv"
data = pd.read_csv(data_path)

data.head()


Unnamed: 0,Length (mm),Depth (mm),Species
0,39.1,18.7,Adelie
1,39.5,17.4,Adelie
2,40.3,18.0,Adelie
3,36.7,19.3,Adelie
4,39.3,20.6,Adelie


In [4]:
data, target = data[columns], data[target]
data_train, data_test, target_train, target_test = train_test_split(data, target, random_state=0)


In [5]:
classifier = RandomForestClassifier(max_depth=3)
classifier.fit(data_train, target_train)
classifier

In [6]:
test_score = classifier.score(data_test, target_test)
test_score

0.9651162790697675

## **MLFlow Track Experiment**

In [10]:
!mlflow server --backend-store-uri sqlite:///mflow.db --default-artifact-root mlruns/ --host 127.0.0.1 --port 5000

[2022-12-13 21:38:09 +0530] [12644] [INFO] Starting gunicorn 20.1.0
[2022-12-13 21:38:09 +0530] [12644] [ERROR] Connection in use: ('127.0.0.1', 5000)
[2022-12-13 21:38:09 +0530] [12644] [ERROR] Retrying in 1 second.
[2022-12-13 21:38:10 +0530] [12644] [ERROR] Connection in use: ('127.0.0.1', 5000)
[2022-12-13 21:38:10 +0530] [12644] [ERROR] Retrying in 1 second.
^C


In [15]:
remote_server_uri = "http://127.0.0.1:5000"
mlflow.set_tracking_uri(remote_server_uri)
mlflow

<module 'mlflow' from '/home/mnk/python/envs/pytorch/lib/python3.10/site-packages/mlflow/__init__.py'>

### **Create New Experiment**

In [16]:
exp = "Penguin Classification"
mlflow.create_experiment(exp)

'1'

### **Signature the Model**

In [30]:
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec

input_schema = Schema([
    ColSpec("float", "Length (mm)"),
    ColSpec("float", "Depth (mm)")
])

output_schema = Schema([
    ColSpec("string", "Species")
])

signature = ModelSignature(inputs=input_schema, outputs=output_schema)


### **Start Experiment**

In [31]:
mlflow.set_experiment(exp)
with mlflow.start_run() as run:
    print(f"Started Run {run.info}")
    
    columns = ["Length (mm)", "Depth (mm)"]
    target = "Species"
    data_path = "./data/penguins_classification.csv"
    data = pd.read_csv(data_path)

    data, target = data[columns], data[target]
    data_train, data_test, target_train, target_test = train_test_split(data, target, random_state=0)

    max_depth = 3 
    max_leaf_nodes = 5

    classifier = RandomForestClassifier(max_depth=max_depth, max_leaf_nodes=max_leaf_nodes)
    classifier.fit(data_train, target_train)

    test_score = classifier.score(data_test, target_test)
    
    mlflow.log_params({
        "max_depth" : max_depth,
        "max_leaf_nodes" : max_leaf_nodes
    })
    mlflow.log_metrics({
        "test_score" : test_score
        })
    mlflow.log_artifact("./mlops_basics.ipynb")
    mlflow.sklearn.log_model(classifier, "RFC_model", signature=signature)
            

Started Run <RunInfo: artifact_uri='mlruns/1/cbbf98531b2f418e9256f77d1e0a1660/artifacts', end_time=None, experiment_id='1', lifecycle_stage='active', run_id='cbbf98531b2f418e9256f77d1e0a1660', run_name='fearless-fish-423', run_uuid='cbbf98531b2f418e9256f77d1e0a1660', start_time=1670950147211, status='RUNNING', user_id='mnk'>


### **Predict the Model**

In [33]:
logged_model = 'runs:/cbbf98531b2f418e9256f77d1e0a1660/RFC_model'
loaded_model = mlflow.pyfunc.load_model(logged_model)

#change datatype of the pandas
data_test = data_test.astype('float32')
loaded_model.predict(data_test)

array(['Adelie', 'Chinstrap', 'Adelie', 'Chinstrap', 'Adelie',
       'Chinstrap', 'Gentoo', 'Adelie', 'Gentoo', 'Adelie', 'Adelie',
       'Gentoo', 'Gentoo', 'Adelie', 'Adelie', 'Gentoo', 'Gentoo',
       'Gentoo', 'Chinstrap', 'Adelie', 'Chinstrap', 'Adelie',
       'Chinstrap', 'Gentoo', 'Adelie', 'Adelie', 'Gentoo', 'Chinstrap',
       'Gentoo', 'Gentoo', 'Adelie', 'Adelie', 'Adelie', 'Gentoo',
       'Gentoo', 'Adelie', 'Chinstrap', 'Gentoo', 'Chinstrap', 'Adelie',
       'Adelie', 'Adelie', 'Adelie', 'Gentoo', 'Adelie', 'Adelie',
       'Chinstrap', 'Gentoo', 'Gentoo', 'Chinstrap', 'Adelie', 'Adelie',
       'Adelie', 'Adelie', 'Adelie', 'Adelie', 'Gentoo', 'Adelie',
       'Adelie', 'Chinstrap', 'Adelie', 'Adelie', 'Adelie', 'Gentoo',
       'Adelie', 'Adelie', 'Adelie', 'Gentoo', 'Adelie', 'Gentoo',
       'Adelie', 'Chinstrap', 'Adelie', 'Adelie', 'Gentoo', 'Adelie',
       'Chinstrap', 'Adelie', 'Adelie', 'Gentoo', 'Adelie', 'Gentoo',
       'Chinstrap', 'Adelie', 'Chinstrap

## **ML Deployment & Manage**

In [35]:
from mlflow.tracking import MlflowClient
from mlflow.exceptions import RestException

model_name = "RFC_model"
client = MlflowClient()
try:
    registered_model = client.create_registered_model(model_name)
    print(registered_model)
except RestException:
    print(f"Model {model_name} already exists")

<RegisteredModel: creation_timestamp=1670953927964, description='', last_updated_timestamp=1670953927964, latest_versions=[], name='RFC_model', tags={}>


In [36]:
client.create_model_version(
    name=model_name,
    source=logged_model,
    run_id=run.info.run_id
)


2022/12/13 23:28:42 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: RFC_model, version 1


<ModelVersion: creation_timestamp=1670954322126, current_stage='None', description='', last_updated_timestamp=1670954322126, name='RFC_model', run_id='cbbf98531b2f418e9256f77d1e0a1660', run_link='', source='runs:/cbbf98531b2f418e9256f77d1e0a1660/RFC_model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [37]:
result = mlflow.register_model(
    f"runs:/{run.info.run_id}/RFC_model",
    model_name
)

result

Registered model 'RFC_model' already exists. Creating a new version of this model...
2022/12/13 23:30:37 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: RFC_model, version 2
Created version '2' of model 'RFC_model'.


<ModelVersion: creation_timestamp=1670954437654, current_stage='None', description='', last_updated_timestamp=1670954437654, name='RFC_model', run_id='cbbf98531b2f418e9256f77d1e0a1660', run_link='', source='mlruns/1/cbbf98531b2f418e9256f77d1e0a1660/artifacts/RFC_model', status='READY', status_message='', tags={}, user_id='', version='2'>

In [39]:
!MLFLOW_TRACKING_URI=http://localhost:5000 mlflow models serve --no-conda -m "models:/RFC_model/2" -p 4242

2022/12/13 23:34:22 INFO mlflow.models.flavor_backend_registry: Selected backend for flavor 'python_function'
2022/12/13 23:34:22 INFO mlflow.pyfunc.backend: === Running command 'exec gunicorn --timeout=60 -b 127.0.0.1:4242 -w 1 ${GUNICORN_CMD_ARGS} -- mlflow.pyfunc.scoring_server.wsgi:app'
[2022-12-13 23:34:22 +0530] [28209] [INFO] Starting gunicorn 20.1.0
[2022-12-13 23:34:22 +0530] [28209] [ERROR] Connection in use: ('127.0.0.1', 4242)
[2022-12-13 23:34:22 +0530] [28209] [ERROR] Retrying in 1 second.
^C


### **Requesting API**

In [97]:
import json
import requests
from requests.structures import CaseInsensitiveDict

url = "http://localhost:4242/invocations"
headers = CaseInsensitiveDict()
headers["Content-Type"] = "application/json; format=pandas-split"


In [98]:
data = [{"Length (mm)": 1.2,"Depth (mm)": 3.3}
]

In [99]:
response = requests.post(url, headers=headers, data=json.dumps(data))
response.status_code

415

In [67]:
response.text

"Unrecognized content type parameters: format. IMPORTANT: The MLflow Model scoring protocol has changed in MLflow version 2.0. If you are seeing this error, you are likely using an outdated scoring request format. To resolve the error, either update your request format or adjust your MLflow Model's requirements file to specify an older version of MLflow (for example, change the 'mlflow' requirement specifier to 'mlflow==1.30.0'). If you are making a request using the MLflow client (e.g. via `mlflow.pyfunc.spark_udf()`), upgrade your MLflow client to a version >= 2.0 in order to use the new request format. For more information about the updated MLflow Model scoring protocol in MLflow 2.0, see https://mlflow.org/docs/latest/models.html#deploy-mlflow-models."

In [70]:
client.transition_model_version_stage(
    name=model_name,
    version=2,
    stage="Production"
)

<ModelVersion: creation_timestamp=1670954437654, current_stage='Production', description='', last_updated_timestamp=1670956032256, name='RFC_model', run_id='cbbf98531b2f418e9256f77d1e0a1660', run_link='', source='mlruns/1/cbbf98531b2f418e9256f77d1e0a1660/artifacts/RFC_model', status='READY', status_message='', tags={}, user_id='', version='2'>