<a href="https://colab.research.google.com/github/Aggregate-Intellect/original-handson-packages/blob/main/MLOps/Module1-Model_Packaging/Common_Serialization_Methods/Serialize_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Model Serialization Example

This is a simple model used for illustrating the fragile nature of serializing object with Python's Pickle format.

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [None]:
# Download the csv to the content directory in colab
! wget https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv

--2020-03-08 21:48:01--  https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv
Resolving gist.githubusercontent.com (gist.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to gist.githubusercontent.com (gist.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3975 (3.9K) [text/plain]
Saving to: ‘iris.csv’


2020-03-08 21:48:02 (108 MB/s) - ‘iris.csv’ saved [3975/3975]



# Data Prep

In [None]:
iris_df = pd.read_csv('/content/iris.csv')

# Change label strings to ints - for an actual model, a label_encoder would be used here
species = {'Setosa': 0,'Versicolor': 1, 'Virginica': 2}
iris_df['variety'] = [species[item] for item in iris_df['variety']] 

In [None]:
iris_df['variety'].value_counts()

2    50
1    50
0    50
Name: variety, dtype: int64

In [None]:
print(iris_df.shape)
iris_df.head()

(150, 5)


Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [None]:
# Create features / labels and train / test splits
iris_x = iris_df.drop('variety', axis = 1)
iris_y = iris_df[['variety']]

X_train, x_test, Y_train, y_test = train_test_split(iris_x,
                                                    iris_y,
                                                    test_size=0.3,
                                                    random_state=0)

In [None]:
# Convert to tensors
X_train = torch.from_numpy(X_train.values).float()
X_test = torch.from_numpy(x_test.values).float()
y_train = torch.from_numpy(Y_train.values).view(1,-1)[0]
y_test = torch.from_numpy(y_test.values).view(1,-1)[0]

# Model

In [None]:
# Simple MLP for demonstration serialization

input_size = 4
output_size = 3
hidden_size = 30

class IrisNet(nn.Module):
    def __init__(self):
        super(IrisNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, X):
        X = torch.sigmoid((self.fc1(X)))
        X = torch.sigmoid(self.fc2(X))
        X = self.fc3(X)

        return F.log_softmax(X, dim=-1)

In [None]:
# initialize the network and define the optimizer and loss function
model = IrisNet()
optimizer = optim.Adam(model.parameters(), lr = 0.03)
loss_fn = nn.NLLLoss()

In [None]:
# Train the model

epochs = 500

for epoch in range(epochs):
    optimizer.zero_grad()
    y_pred = model(X_train)
    loss = loss_fn(y_pred , y_train)
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f'Epoch: {epoch} loss: {loss.item()}')

Epoch: 0 loss: 1.10524320602417
Epoch: 100 loss: 0.022699834778904915
Epoch: 200 loss: 0.017987484112381935
Epoch: 300 loss: 0.014632522128522396
Epoch: 400 loss: 0.011214395053684711


In [None]:
def inference(model, input):
  """Conduct inference for a model"""

  return torch.argmax(model(input))

In [None]:
example = torch.tensor([5.1, 3.5, 1.4, 0.2])

pred = inference(model, example)
print(pred)

tensor(0)


# Serialize the model

In [None]:
# Mount to google drive in order to save there

from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
model_name = 'iris_model.pt'
model_path = f"/content/gdrive/My Drive/MLOPS/hands_on/serialization/models/{model_name}" 

In [None]:
# Save the model
torch.save(model, model_path)

In [None]:
# Ensure the model was saved
! ls /content/gdrive/My\ Drive/MLOPS/hands_on/serialization/models/

iris_model.pt


In [None]:
# Load the model
new_model = torch.load(model_path)

In [None]:
new_model

IrisNet(
  (fc1): Linear(in_features=4, out_features=30, bias=True)
  (fc2): Linear(in_features=30, out_features=30, bias=True)
  (fc3): Linear(in_features=30, out_features=3, bias=True)
)

In [None]:
example = torch.tensor([5.1, 3.5, 1.4, 0.2])
pred = inference(new_model, example)
print(pred)

tensor(0)


# state_dict

In [None]:
model.state_dict()

OrderedDict([('fc1.weight', tensor([[-0.1933, -1.1438,  0.9321,  0.7992],
                      [ 0.8488, -0.0461, -1.1099, -1.1657],
                      [ 0.7280, -0.9130, -0.5639, -0.6317],
                      [-0.2330, -0.8349,  1.1358,  1.8528],
                      [ 0.3182, -1.3219,  0.6499,  0.5758],
                      [ 0.0064, -1.1606,  1.0093,  0.0978],
                      [ 0.6477, -0.0620, -1.0373, -0.5128],
                      [-0.7987, -0.0450,  1.0813,  0.9355],
                      [-0.4320, -0.4130,  1.0069,  0.3191],
                      [-0.1041, -1.1976,  1.0155,  0.6686],
                      [ 0.5000,  0.1730, -0.7202, -1.2627],
                      [-0.0262,  1.1659, -0.7866, -0.6987],
                      [-0.4091, -0.3475,  0.9222,  0.4675],
                      [ 0.1547,  0.2475, -0.6214, -0.6809],
                      [ 0.9046, -0.0538, -1.0953, -1.0928],
                      [ 0.2168,  0.8815, -1.0541, -0.2723],
                      [ 0.

In [None]:
model_name = 'iris_model_state_dict.pt'
model_path = f"/content/gdrive/My Drive/MLOPS/hands_on/serialization/models/{model_name}" 

# Save the models state_dict
torch.save(model.state_dict(), model_path)

In [None]:
! ls /content/gdrive/My\ Drive/MLOPS/hands_on/serialization/models/

iris_model.pt  iris_model_state_dict.pt


In [None]:
model_name = 'iris_model_state_dict.pt'
model_path = f"/content/gdrive/My Drive/MLOPS/hands_on/serialization/models/{model_name}" 

model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [None]:
example = torch.tensor([5.1, 3.5, 1.4, 0.2])
pred = inference(new_model, example)
print(pred)

tensor(0)
