# MLflow Packaging
---

In [0]:
# Install dependencies

# capture - disables cell output
%%capture
! pip install transformers
! pip install mlflow

In [3]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [4]:
import torch
from transformers import *

In [5]:
# BERT tokenizer - this should be serialized with the model
# Because without the tokenizer, we cannot predict
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Pretrained BERT model
model = BertModel.from_pretrained('bert-base-uncased')

# example
input_ids = torch.tensor([tokenizer.encode("Here is some text to encode", add_special_tokens=True)])

HBox(children=(IntProgress(value=0, description='Downloading', max=231508, style=ProgressStyle(description_wid…




HBox(children=(IntProgress(value=0, description='Downloading', max=361, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='Downloading', max=440473133, style=ProgressStyle(description_…




In [6]:
# Run the sample through the model
with torch.no_grad():
    last_hidden_states = model(input_ids)[0]

# Output from the language model
last_hidden_states

tensor([[[-0.0549,  0.1053, -0.1065,  ..., -0.3550,  0.0686,  0.6506],
         [-0.5759, -0.3650, -0.1383,  ..., -0.6782,  0.2092, -0.1639],
         [-0.1641, -0.5597,  0.0150,  ..., -0.1603, -0.1346,  0.6216],
         ...,
         [ 0.2448,  0.1254,  0.1587,  ..., -0.2749, -0.1163,  0.8809],
         [ 0.0481,  0.4950, -0.2827,  ..., -0.6097, -0.1212,  0.2527],
         [ 0.9046,  0.2137, -0.5897,  ...,  0.3040, -0.6172, -0.1950]]])

# MLflow

So we now have a model and we want to serialize and package this model for production.

In [0]:
import mlflow
import mlflow.pytorch

In [0]:
# CHANGE THESE TO THE LOCATION THAT YOU WOULD LIKE FOR YOUR DRIVE
mlflow_path = f"/content/gdrive/My Drive/MLOPS/hands_on/mlflow/models/ML1"

# mlflow_env.yml is a provided file
conda_env_path = f'/content/gdrive/My Drive/MLOPS/hands_on/mlflow/mlflow_env.yml'

mlflow.pytorch.save_model(model, mlflow_path, conda_env=conda_env_path)

In [11]:
! ls /content/gdrive/My\ Drive/MLOPS/hands_on/mlflow/models

ML1


In [0]:
# Load model
new_model = mlflow.pytorch.load_model(mlflow_path)

In [13]:
new_model(input_ids)[0]

tensor([[[-0.0549,  0.1053, -0.1065,  ..., -0.3550,  0.0686,  0.6506],
         [-0.5759, -0.3650, -0.1383,  ..., -0.6782,  0.2092, -0.1639],
         [-0.1641, -0.5597,  0.0150,  ..., -0.1603, -0.1346,  0.6216],
         ...,
         [ 0.2448,  0.1254,  0.1587,  ..., -0.2749, -0.1163,  0.8809],
         [ 0.0481,  0.4950, -0.2827,  ..., -0.6097, -0.1212,  0.2527],
         [ 0.9046,  0.2137, -0.5897,  ...,  0.3040, -0.6172, -0.1950]]],
       grad_fn=<NativeLayerNormBackward>)

# Pyfunc Method

In [0]:
# THIS FILE NEEDS TO BE ADDED TO THE FILE EXPLORER ON THE LEFT
# CLICK THE FOLDER ICON AND YOU CAN DRAG AND DROP THE FILE INTO THE WINDOW
# THE FILE IS AVAILABLE WITH THE REST OF THE MATERIAL
import wrapper_model

In [0]:
# Create a pyfunc_data directory in files panel on the left
! mkdir /content/pyfunc_data

In [0]:
# MLflow output path
pyfunc_data = '/content/pyfunc_data/'

# Serialize the model
torch.save(model, f'{pyfunc_data}/model.pt')

# Serialize the tokenizer
torch.save(tokenizer, f'{pyfunc_data}/tokenizer.pt')

In [18]:
mlflow_path = f"/content/gdrive/My Drive/MLOPS/hands_on/mlflow/models/ML2"

mlflow.pyfunc.save_model(
    path=mlflow_path,
    loader_module=wrapper_model.__name__,
    data_path=pyfunc_data,
    code_path=['/content/gdrive/My Drive/MLOPS/hands_on/mlflow/wrapper_model.py'],
    conda_env=f'/content/gdrive/My Drive/MLOPS/hands_on/mlflow/mlflow_env.yml'
)

<mlflow.models.Model at 0x7f6d66f92320>

In [0]:
# Load in MLflow model
mlflow_model = mlflow.pyfunc.load_model(mlflow_path)

In [20]:
import pandas as pd

# Sample inference text
inf_df = pd.DataFrame(["Here is some text to encode"], columns=['text'])
inf_df.head()

Unnamed: 0,text
0,Here is some text to encode


In [21]:
mlflow_model.predict(inf_df)

tensor([[[-0.0549,  0.1053, -0.1065,  ..., -0.3550,  0.0686,  0.6506],
         [-0.5759, -0.3650, -0.1383,  ..., -0.6782,  0.2092, -0.1639],
         [-0.1641, -0.5597,  0.0150,  ..., -0.1603, -0.1346,  0.6216],
         ...,
         [ 0.2448,  0.1254,  0.1587,  ..., -0.2749, -0.1163,  0.8809],
         [ 0.0481,  0.4950, -0.2827,  ..., -0.6097, -0.1212,  0.2527],
         [ 0.9046,  0.2137, -0.5897,  ...,  0.3040, -0.6172, -0.1950]]],
       grad_fn=<NativeLayerNormBackward>)