# **Serializing Example**

# Agenda:
The main Agenda of this notebook is to demostrate the two different mehtods of serialization. 
1. model.save() method (inbuild method of pytorch)
2. model.state_dict() method.

we are also going to show the comparison of both of them, and clarify why the pickle is recommendaed with comparsion to model.save().

Please, load the second notebook (*link) into another tab for hand on practice. [Note: If colab do not allow the second session, then change the runtime for second notebook from gpu to tpu.



# Model Serialization Example

This is a simple model used for illustrating the fragile nature of serializing object with Python's Pickle format.

In [None]:
# import dependencies

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from torchsummary import summary

import warnings
warnings.filterwarnings('ignore')

In [None]:
# Download the csv to the content directory in colab

! wget https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv

--2021-06-06 15:51:18--  https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv
Resolving gist.githubusercontent.com (gist.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.110.133, ...
Connecting to gist.githubusercontent.com (gist.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3975 (3.9K) [text/plain]
Saving to: ‘iris.csv.3’


2021-06-06 15:51:18 (24.5 MB/s) - ‘iris.csv.3’ saved [3975/3975]



# Data Preparation



In [None]:
iris_df = pd.read_csv('/content/iris.csv')
iris_df.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [None]:
# Converting label data to integer for traiing the model.

species = {'Setosa': 0,'Versicolor': 1, 'Virginica': 2}
iris_df['variety'] = [species[item] for item in iris_df['variety']] 
iris_df.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [None]:
iris_df['variety'].value_counts()

2    50
1    50
0    50
Name: variety, dtype: int64

In [None]:
print(iris_df.shape)
iris_df.head()

(150, 5)


Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [None]:
iris_df.shape

(150, 5)

In [None]:
# Create features / labels and train / test splits
iris_x = iris_df.drop('variety', axis = 1)                                      # create training dataset (inputs for model)
iris_y = iris_df[['variety']]                                                   # create testing dataset (output column)

X_train, x_test, Y_train, y_test = train_test_split(iris_x,
                                                    iris_y,
                                                    test_size=0.3,
                                                    random_state=0)

In [None]:
# Convert data from numpy format to pytorch tensor
X_train = torch.from_numpy(X_train.values).float()
X_test = torch.from_numpy(x_test.values).float()
y_train = torch.from_numpy(Y_train.values).view(1,-1)[0]
y_test = torch.from_numpy(y_test.values).view(1,-1)[0]

# Model Development

Here, we will use the simple Multi layer Perceptron model with three layers.

In [None]:
# Simple MLP for demonstration serialization

input_size = 4
output_size = 3
hidden_size = 30

class IrisNet(nn.Module):
    def __init__(self):
        super(IrisNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, X):
        X = torch.sigmoid((self.fc1(X)))
        X = torch.sigmoid(self.fc2(X))
        X = self.fc3(X)

        return F.log_softmax(X, dim=-1)


# Let's visualize the model.
model = IrisNet()
print(model)
summary(IrisNet(), (4,))

IrisNet(
  (fc1): Linear(in_features=4, out_features=30, bias=True)
  (fc2): Linear(in_features=30, out_features=30, bias=True)
  (fc3): Linear(in_features=30, out_features=3, bias=True)
)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 30]             150
            Linear-2                   [-1, 30]             930
            Linear-3                    [-1, 3]              93
Total params: 1,173
Trainable params: 1,173
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [None]:
# initialize the network and define the optimizer and loss function
model = IrisNet()
optimizer = optim.Adam(model.parameters(), lr = 0.03)
loss_fn = nn.NLLLoss()

In [None]:
# Train the model

epochs = 500

for epoch in range(epochs):
    optimizer.zero_grad()
    y_pred = model(X_train)
    loss = loss_fn(y_pred , y_train)
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f'Epoch: {epoch} loss: {loss.item()}')

Epoch: 0 loss: 1.1344870328903198
Epoch: 100 loss: 0.022997340187430382
Epoch: 200 loss: 0.021345626562833786
Epoch: 300 loss: 0.020002977922558784
Epoch: 400 loss: 0.01113436371088028


In [None]:
def inference(model, input):
  """Conduct inference for a model"""

  return torch.argmax(model(input))

In [None]:
example = torch.tensor([5.1, 3.5, 1.4, 0.2])

pred = inference(model, example)
print(pred)

tensor(0)


# Serialize the model

### Why Serialization in Machine Learning:

The serialization is practice to convert data in serialize format. In machine learning whenever you train the model it takes time and computation power as well. Some model are too complex that takes hours of time for train themselves. In case, if we require to use model in later period of time, then best to save the trained model and reuse whenever requried. The advangate of serialization is that **the Serialize format are much faster to load with compariosn to json or SQL foramt file.**

Practically, the trained model are considered as python object. In serialization we just convert that python object to serialize format which consist the specific dataformat for serializing perfectly.

In [None]:
# Mount to google drive in order to save there
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
model_name = 'iris_model.pt'
model_path = f"/content/drive/MyDrive/OpenSource/trained_models/{model_name}" 

In [None]:
# Method1: Save the model using inbuild pycharm library
torch.save(model, model_path)

In [None]:
# Ensure the model was saved
! ls /content/drive/MyDrive/OpenSource/trained_models/

iris_model.pt


In [None]:
# Load the same model
new_model = torch.load(model_path)

In [None]:
new_model

IrisNet(
  (fc1): Linear(in_features=4, out_features=30, bias=True)
  (fc2): Linear(in_features=30, out_features=30, bias=True)
  (fc3): Linear(in_features=30, out_features=3, bias=True)
)

In [None]:
example = torch.tensor([5.1, 3.5, 1.4, 0.2])
pred = inference(new_model, example)
print(pred)

tensor(0)


#### here, we get the same model as before.

# state_dictionary

This dictionary consist all the trained parameter of model. It's easy to observe that the wight nd biases are saved seperately in serialize format.

In [None]:
model.state_dict()

OrderedDict([('fc1.weight', tensor([[-0.7039, -0.7232,  1.2626,  0.9212],
                      [-0.8046,  0.2032,  0.9405,  1.1627],
                      [ 0.0496, -1.0466,  1.4352,  1.2532],
                      [ 0.0384,  0.7688, -0.6563, -0.9698],
                      [ 0.4449,  0.5505, -1.1367, -0.3181],
                      [ 0.2441,  0.3431, -0.6200, -1.0291],
                      [ 0.4258,  0.1790, -0.5916, -1.4542],
                      [ 0.3147,  0.7842, -1.0299, -0.7441],
                      [-0.6479, -0.2598,  0.9885,  0.8326],
                      [ 0.2466,  0.5842, -0.9097, -0.2893],
                      [-0.5972, -0.4321,  1.0531,  1.1189],
                      [ 0.5840, -0.0381, -0.8366, -0.9032],
                      [-0.3388, -0.6425,  0.9253,  0.6968],
                      [ 0.7705,  0.8602,  0.9352,  0.4550],
                      [-0.5446, -0.3042,  1.1199,  0.5714],
                      [ 0.6133,  0.3940, -1.0259, -0.8739],
                      [-0.

In [None]:
model_name = 'iris_model_state_dict.pt'
model_path = f"/content/drive/MyDrive/OpenSource/trained_models/{model_name}" 

# Save the models state_dict
torch.save(model.state_dict(), model_path)

In [None]:
! ls /content/drive/MyDrive/OpenSource/trained_models/

iris_model.pt  iris_model_state_dict.pt


In [None]:
model_name = 'iris_model_state_dict.pt'
model_path = f"/content/drive/MyDrive/OpenSource/trained_models/{model_name}" 

model.load_state_dict(torch.load(model_path))
print(model)

IrisNet(
  (fc1): Linear(in_features=4, out_features=30, bias=True)
  (fc2): Linear(in_features=30, out_features=30, bias=True)
  (fc3): Linear(in_features=30, out_features=3, bias=True)
)


In [None]:
example = torch.tensor([5.1, 3.5, 1.4, 0.2])
pred = inference(new_model, example)
print(pred)

tensor(0)


###  We got the same result as before. The key difference between both we will explore in this (*link) notebook.

In [None]:
# prefinal [attempt 1]