In [1]:
# In this project we will discuss about how to save our PyTorch models to files and load them up again to make predictions

In [2]:
# Overview
# This  is in three parts; they are

# Build an Example Model
# What’s Inside a PyTorch Model
# Accessing state_dict of a Model

In [1]:
# Build an Example Model

#requierd library
import torch as th
import torch.nn as nn
import torch.optim as op

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [2]:
# Load data into NumPy arrays

dt=load_iris()

In [3]:
x=dt.data
y=dt.target

In [4]:
# convert NumPy array into PyTorch tensors

X=th.FloatTensor(x)

Y=th.tensor(y,dtype=th.long)

In [7]:
X.shape,Y.shape

(torch.Size([150, 4]), torch.Size([150]))

In [8]:
# split the data using train_test_split model

Xtrain,Xtest,Ytrain,Ytest=train_test_split(X,Y)

In [9]:
# PyTorch model

class Multiclass(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.hidden=nn.Linear(4,8)
        self.act=nn.ReLU()
        self.output=nn.Linear(8,3)
        self.logsoftmax=nn.LogSoftmax(dim=1)
        
    def forward(self,x):
        x=self.act(self.hidden(x))
        x=self.logsoftmax(self.output(x))
        return x
    
mdl=Multiclass()
print(mdl)

Multiclass(
  (hidden): Linear(in_features=4, out_features=8, bias=True)
  (act): ReLU()
  (output): Linear(in_features=8, out_features=3, bias=True)
  (logsoftmax): LogSoftmax(dim=1)
)


In [10]:
# loss metric and optimizer

criterion=nn.NLLLoss()
optimizer=op.Adam(mdl.parameters(),lr=0.001)

In [11]:
# prepare model and training parameters

n_epoch=100
batch_size=5
batch_start=th.arange(0,len(X),batch_size)

In [12]:
# training_loop

for epoch in range(n_epoch):
    for start in batch_start:
          # take batch
        X_batch=Xtrain[start:start+batch_size]
        Y_batch=Ytrain[start:start+batch_size]
        # forward_pass
        yp=mdl(X_batch)
        loss=criterion(yp,Y_batch)
        #foward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [13]:
# With such a simple model and small dataset, it shouldn’t take a long time to finish training.
# Afterwards, we can confirm that this model works, by evaluating it with the test set:

In [15]:
y_pred=mdl(Xtest)
accuracy=(th.argmax(y_pred,1)==Ytest).float().mean()
print("Accuracy: %.2f" % accuracy)

Accuracy: 1.00


In [15]:
# What’s Inside a PyTorch Model

# PyTorch model is an object in Python. It holds some deep learning building blocks
# such as various kinds of layers and activation functions

In [16]:
optimizer=op.Adam(mdl.parameters(),lr=0.001)

# The function model.parameters() give us a generator that reference to each layers
# trainable parameters in turn in the form of PyTorch tensors.

In [17]:
# create a new model
new_model=Multiclass()

# ask PyTorch to ignore autograd on update and overwrite parameters
with th.no_grad():
    for new_tensor,old_tensor in zip(new_model.parameters(),mdl.parameters()): # file is indeed a zip file of some pickle files created by PyTorch.
        new_tensor.copy_(old_tensor)
        
# test with new model using copied tensor
y_pred=mdl(Xtest)
accuracy=(th.argmax(y_pred,1)==Ytest).float().mean()
print("Accuracy: %.2f" % accuracy)

Accuracy: 1.00


In [18]:
# Which the result should be exactly the same as before since you essentially made the two models
# identical by copying the parameters.

In [19]:
# Accessing state_dict of a Model

# To access all parameters of a model, trainable or not, we can get it from state_dict() function
# From the model above, this is what we can get:

In [18]:
import pprint #provides a capability to “pretty-print” Python data structures in a form which can be used as input to the interpreter. 
pp=pprint.PrettyPrinter(indent=4)
pp.pprint(mdl.state_dict())

OrderedDict([   (   'hidden.weight',
                    tensor([[ 0.0946, -0.4854, -0.4933, -0.3036],
        [-0.1476, -0.3080, -0.1405,  0.1318],
        [-0.1230,  0.0967, -0.4026, -0.3532],
        [ 0.3155,  0.9868, -0.8226, -0.5512],
        [ 0.4532,  0.8792, -0.6015, -1.0659],
        [-0.0846,  0.1975, -0.0116, -0.3712],
        [ 0.2793, -0.5005,  1.0453,  1.6698],
        [-0.0959, -0.4043,  0.3298,  0.0575]])),
                (   'hidden.bias',
                    tensor([ 0.3790,  0.1692, -0.4927,  0.9936,  0.1868, -0.3202, -0.3994, -0.5099])),
                (   'output.weight',
                    tensor([[ 0.1588, -0.2854, -0.2622,  1.3242,  0.5808, -0.0564, -0.7935, -0.1476],
        [ 0.3037,  0.2441,  0.0088,  0.3129,  0.2621,  0.0309,  0.2539,  0.0332],
        [-0.1600, -0.2113,  0.2586, -1.5670, -1.5884,  0.3113,  0.6893, -0.3124]])),
                ('output.bias', tensor([-0.2348, -0.0321, -0.3230]))])


In [21]:
# It is called state_dict because all state variables of a model are here.
# The Ordered_Dict object allows us to map the weights back to the parameters correctly by matching their names

In [19]:
import pickle

In [23]:
# We know it works because the model we didn’t train produced the same result as the one we trained.

In [20]:
#save the model
with open("iris-model.pickle","wb") as fp:
    pickle.dump(mdl.state_dict(),fp)
    
# Create new model and load states
newmodel = Multiclass()
with open("iris-model.pickle", "rb") as fp:
    newmodel.load_state_dict(pickle.load(fp))

# test with new model using copied tensor
y_pred = newmodel(Xtest)
acc = (th.argmax(y_pred, 1) == Ytest).float().mean()
print("Accuracy: %.2f" % acc)

Accuracy: 1.00


In [25]:
# The recommended way is to use the PyTorch API to save and load the states, instead of using pickle manually:

In [21]:
# Save model
th.save(mdl.state_dict(), "iris-model.pth")
 
# Create new model and load states
newmodel = Multiclass()
newmodel.load_state_dict(th.load("iris-model.pth"))
 
# test with new model using copied tensor
y_pred = newmodel(Xtest)
acc = (th.argmax(y_pred, 1) == Ytest).float().mean()
print("Accuracy: %.2f" % acc)

Accuracy: 1.00


In [22]:
#Save model
th.save(mdl, "iris-model-full.pth")
 
# Load model
new_model = th.load("iris-model-full.pth")
 
# test with new model using copied tensor
y_pred = new_model(Xtest)
acc = (th.argmax(y_pred, 1) == Ytest).float().mean()
print("Accuracy: %.2f" % acc)

Accuracy: 1.00


In [28]:
# The new_model object above is an instance of Multiclass class that we defined before.
# When we load the model from disk, Python need to know in detail how this class is defined.

# That’s why it is recommended to save only the state dict rather than the entire model.

In [29]:
# Putting everything together, the following is the complete code to demonstrate how to create a model, train it
# and save to disk:

In [23]:
# Load data into NumPy arrays
data = load_iris()
X, y = data["data"], data["target"]
 
# convert NumPy array into PyTorch tensors
X = th.FloatTensor(X)
y = th.tensor(y, dtype=th.long)
 
# split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True)
 
# PyTorch model
mdl=nn.Sequential(
    nn.Linear(4,8),
    nn.ReLU(),
    nn.Linear(8,5),
    nn.ReLU(),
    nn.Linear(5,3),
    nn.LogSoftmax(dim=1)
)

# loss metric and optimizer
loss_fn = nn.NLLLoss()
optimizer = op.Adam(mdl.parameters(), lr=0.001)
 
# prepare model and training parameters
n_epochs = 100
batch_size = 5
batch_start = th.arange(0, len(X), batch_size)
 
# training loop
for epoch in range(n_epochs):
    for start in batch_start:
        # take a batch
        X_batch = X_train[start:start+batch_size]
        y_batch = y_train[start:start+batch_size]
        # forward pass
        y_pred = mdl(X_batch)
        loss = loss_fn(y_pred, y_batch)
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update weights
        optimizer.step()
        
# Save model
th.save(mdl.state_dict(), "iris-model.pth")

In [34]:
# And the following is how to load the model from disk and run it for inference:

In [24]:
# Load data into NumPy arrays
data = load_iris()
x,y = data["data"], data["target"]
 
# convert NumPy array into PyTorch tensors
X = th.FloatTensor(x)
Y = th.tensor(y, dtype=th.long)
 
# PyTorch model
class Multiclass(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(4, 8)
        self.act = nn.ReLU()
        self.output = nn.Linear(8, 3)
        self.logsoftmax = nn.LogSoftmax(dim=1)
 
    def forward(self, x):
        x = self.act(self.hidden(x))
        x = self.logsoftmax(self.output(x))
        return x

# Create new model and load states
model = Multiclass()
with open("iris-model.pickle", "rb") as fp:
    model.load_state_dict(pickle.load(fp))
    
# Run model for inference
y_pred = model(X_test)
acc = (th.argmax(y_pred, 1) == y_test).float().mean()
print("Accuracy: %.2f" % acc)

Accuracy: 0.98


In [55]:
# In this topic, we learned how to keep a copy of your trained PyTorch model in disk and how to reuse it

# What are parameters and states in a PyTorch model
# How to save all necessary states from a model to disk
# How to rebuild a working model from the saved states