## Saving & Loading Model

https://www.youtube.com/watch?v=9L9jEOwRrCg&list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4&index=17

In [2]:
import torch
import torch.nn as nn
import copy

from sklearn import datasets
import numpy as np


In [3]:
class Model(nn.Module):
    def __init__(self,n_input_features):
        super(Model,self).__init__()
        self.linear= nn.Linear(n_input_features,1)

    def forward(self,x):
        y_pred=torch.sigmoid(self.linear(x))
        return y_pred

In [4]:
n_input_features=6
model=Model(n_input_features=n_input_features)

print("Old Parameters of Model")
old_state = copy.deepcopy(model.state_dict())
print(old_state)
for param in model.parameters():
    print(param)

Old Parameters of Model
OrderedDict([('linear.weight', tensor([[-0.0284, -0.0576, -0.4028,  0.2033, -0.2339, -0.1442]])), ('linear.bias', tensor([0.1883]))])
Parameter containing:
tensor([[-0.0284, -0.0576, -0.4028,  0.2033, -0.2339, -0.1442]],
       requires_grad=True)
Parameter containing:
tensor([0.1883], requires_grad=True)


In [5]:
from sklearn import datasets
import numpy as np

# 0)Prepare Data
X_numpy,y_numpy=datasets.make_regression(n_samples=10,n_features=n_input_features,noise=20,random_state=1)

x=torch.from_numpy(X_numpy.astype(np.float32))
y=torch.from_numpy(y_numpy.astype(np.float32))
y=y.view(y.shape[0],1) # col vector

n_samples,n_features=x.shape
print("n_samples,n_features",n_samples,n_features)


print("Training ......")
learning_rate=0.01
criterion=nn.MSELoss()
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate)
    
# Training 
num_epochs=10
for epoch in range(num_epochs):
    print(f'Epoch:{epoch}')
    # Forward Pass and loss
    y_predicted=model(x)
    loss=criterion(y_predicted,y)

    # backward pass
    loss.backward()

    # update
    optimizer.step()

    optimizer.zero_grad()

    if (epoch+1)%2==0:
        print(f'epoch: {epoch+1}, loss = {loss.item():.4f}')


print("Old Parameters of Model",old_state)
model_trained_state=copy.deepcopy(model.state_dict())
print("New Parameters of Model",model_trained_state)


n_samples,n_features 10 6
Training ......
Epoch:0
Epoch:1
epoch: 2, loss = 18550.6465
Epoch:2
Epoch:3
epoch: 4, loss = 18506.3066
Epoch:4
Epoch:5
epoch: 6, loss = 18492.7148
Epoch:6
Epoch:7
epoch: 8, loss = 18486.6367
Epoch:8
Epoch:9
epoch: 10, loss = 18483.2656
Old Parameters of Model OrderedDict([('linear.weight', tensor([[-0.0284, -0.0576, -0.4028,  0.2033, -0.2339, -0.1442]])), ('linear.bias', tensor([0.1883]))])
New Parameters of Model OrderedDict([('linear.weight', tensor([[ 1.0419, -0.2924,  1.2426,  1.2473, -0.0870,  1.6466]])), ('linear.bias', tensor([-0.6256]))])


### Method(1) Lazy Method: Save Whole Model

In [6]:
MODEL_FILE_PATH='./Saved_models/model.pth'

# Save
print("Saving ...")
torch.save(model,MODEL_FILE_PATH)
print("Model Saved :D")

# Load
print("Loading ...")
model_loaded_1=torch.load(MODEL_FILE_PATH)
print("Model Loaded :D")

print("Old State:",old_state)
print("Saved Sate:",model_trained_state)
print("Loaded State:",model_loaded_1.state_dict())

Saving ...
Model Saved :D
Loading ...
Model Loaded :D
Old State: OrderedDict([('linear.weight', tensor([[-0.0284, -0.0576, -0.4028,  0.2033, -0.2339, -0.1442]])), ('linear.bias', tensor([0.1883]))])
Saved Sate: OrderedDict([('linear.weight', tensor([[ 1.0419, -0.2924,  1.2426,  1.2473, -0.0870,  1.6466]])), ('linear.bias', tensor([-0.6256]))])
Loaded State: OrderedDict([('linear.weight', tensor([[ 1.0419, -0.2924,  1.2426,  1.2473, -0.0870,  1.6466]])), ('linear.bias', tensor([-0.6256]))])


### Method(2): Save State

In [7]:
MODEL_STATE_FILE_PATH="./Saved_models/state.pth"

# Save
print("Saving ...")
torch.save(model.state_dict(),MODEL_STATE_FILE_PATH)
print("Model Saved :D")

# Load
print("Loading ...")
loaded_model_2=Model(n_input_features=n_input_features)
loaded_model_2.load_state_dict(torch.load(MODEL_STATE_FILE_PATH))
print("Model Loaded :D")

print("Old State:",old_state)
print("Saved Sate:",model_trained_state)
print("Loaded State:",loaded_model_2.state_dict())



Saving ...
Model Saved :D
Loading ...
Model Loaded :D
Old State: OrderedDict([('linear.weight', tensor([[-0.0284, -0.0576, -0.4028,  0.2033, -0.2339, -0.1442]])), ('linear.bias', tensor([0.1883]))])
Saved Sate: OrderedDict([('linear.weight', tensor([[ 1.0419, -0.2924,  1.2426,  1.2473, -0.0870,  1.6466]])), ('linear.bias', tensor([-0.6256]))])
Loaded State: OrderedDict([('linear.weight', tensor([[ 1.0419, -0.2924,  1.2426,  1.2473, -0.0870,  1.6466]])), ('linear.bias', tensor([-0.6256]))])


### Saving CheckPoint

In [8]:
from sklearn import datasets
import numpy as np

# 0)Prepare Data
X_numpy,y_numpy=datasets.make_regression(n_samples=10,n_features=n_input_features,noise=20,random_state=1)

x=torch.from_numpy(X_numpy.astype(np.float32))
y=torch.from_numpy(y_numpy.astype(np.float32))
y=y.view(y.shape[0],1) # col vector

n_samples,n_features=x.shape
print("n_samples,n_features",n_samples,n_features)


print("Training ......")
learning_rate=0.01
criterion=nn.MSELoss()
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate)


# Training 
num_epochs=100
for epoch in range(num_epochs):
    # Forward Pass and loss
    y_predicted=model(x)
    loss=criterion(y_predicted,y)

    # backward pass
    loss.backward()

    # update
    optimizer.step()

    optimizer.zero_grad()

    if (epoch+1)%10==0:
        print(f'epoch: {epoch+1}, loss = {loss.item():.4f}')

checkpoint={
    "epoch":100,
    "model_state":model.state_dict(),
    "optim_state":optimizer.state_dict()
}
print("checkpoint",checkpoint)

CHECKPOINT_PATH="./Saved_models/checkpoint.pth"

# Save
print("Saving ...")
torch.save(checkpoint,CHECKPOINT_PATH)
print("CheckPoint Saved :D")


n_samples,n_features 10 6
Training ......
epoch: 10, loss = 18477.1152
epoch: 20, loss = 18475.2148
epoch: 30, loss = 18474.2852
epoch: 40, loss = 18473.7305
epoch: 50, loss = 18473.3633
epoch: 60, loss = 18473.0996
epoch: 70, loss = 18472.9023
epoch: 80, loss = 18472.7461
epoch: 90, loss = 18472.6230
epoch: 100, loss = 18472.5195
checkpoint {'epoch': 100, 'model_state': OrderedDict([('linear.weight', tensor([[ 2.2189, -0.6021,  2.3864,  1.9830,  0.3417,  3.0607]])), ('linear.bias', tensor([-1.4575]))]), 'optim_state': {'state': {0: {'momentum_buffer': None}, 1: {'momentum_buffer': None}}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1]}]}}
Saving ...
CheckPoint Saved :D


In [9]:
CHECKPOINT_PATH="./Saved_models/checkpoint.pth"

# Load
print("Loading ...")
loaded_checkpoint=torch.load(CHECKPOINT_PATH)
print("CheckPoint Loaded :D")


epoch=loaded_checkpoint['epoch']

model=Model(n_input_features=n_input_features)
criterion=nn.MSELoss()
optimizer=torch.optim.SGD(model.parameters(),lr=0) 

model.load_state_dict(checkpoint['model_state'])
optimizer.load_state_dict(checkpoint['optim_state'])

# Continue Training
num_epochs=50
for epoch in range(num_epochs):
    # Forward Pass and loss
    y_predicted=model(x)
    loss=criterion(y_predicted,y)

    # backward pass
    loss.backward()

    # update
    optimizer.step()

    optimizer.zero_grad()

    if (epoch+1)%10==0:
        print(f'epoch: {epoch+1}, loss = {loss.item():.4f}')

Loading ...
CheckPoint Loaded :D
epoch: 10, loss = 18472.4336
epoch: 20, loss = 18472.3633
epoch: 30, loss = 18472.3008
epoch: 40, loss = 18472.2461
epoch: 50, loss = 18472.1992


### Using GPU

In [10]:
import torch
import torch.nn as nn
import copy

from sklearn import datasets
import numpy as np

n_input_features=6
model_gpu=Model(n_input_features=n_input_features)

print("Old Parameters of Model")
old_state_model_gpu = copy.deepcopy(model_gpu.state_dict())
print(old_state_model_gpu)

test_x=torch.tensor([0.0,0.0,3.0,0.0,5.0,0.5])
model_gpu.eval()
test_y=model_gpu(test_x)

# 0)Prepare Data
X_numpy,y_numpy=datasets.make_regression(n_samples=10,n_features=n_input_features,noise=20,random_state=1)

x=torch.from_numpy(X_numpy.astype(np.float32))
y=torch.from_numpy(y_numpy.astype(np.float32))
y=y.view(y.shape[0],1) # col vector

n_samples,n_features=x.shape
print("n_samples,n_features",n_samples,n_features)


print("Training ......")
learning_rate=0.01
criterion=nn.MSELoss()
optimizer=torch.optim.SGD(model_gpu.parameters(),lr=learning_rate)


# Training 
num_epochs=10
model_gpu.train()
for epoch in range(num_epochs):
    # Forward Pass and loss
    y_predicted=model_gpu(x)
    loss=criterion(y_predicted,y)

    # backward pass
    loss.backward()

    # update
    optimizer.step()

    optimizer.zero_grad()

    if epoch==0 or (epoch+1)%100==0:
        print(f'epoch: {epoch+1}, loss = {loss.item():.4f}')

trained_model_gpu_state=model_gpu.state_dict()
print("old_state_model_gpu",old_state_model_gpu)
print("trained_model_gpu_state:",trained_model_gpu_state)

model_gpu.eval()
print("\n\nBefore Train Y",test_y)
print('After Train Y:',model_gpu(test_x))

Old Parameters of Model
OrderedDict([('linear.weight', tensor([[-0.2760,  0.0468, -0.2364, -0.3454, -0.1454, -0.1949]])), ('linear.bias', tensor([0.0373]))])
n_samples,n_features 10 6
Training ......
epoch: 1, loss = 18625.9336
old_state_model_gpu OrderedDict([('linear.weight', tensor([[-0.2760,  0.0468, -0.2364, -0.3454, -0.1454, -0.1949]])), ('linear.bias', tensor([0.0373]))])
trained_model_gpu_state: OrderedDict([('linear.weight', tensor([[ 0.8653, -0.2219,  1.4056,  0.9043, -0.0538,  1.7966]])), ('linear.bias', tensor([-0.6246]))])


Before Train Y tensor([0.1830], grad_fn=<SigmoidBackward0>)
After Train Y: tensor([0.9855], grad_fn=<SigmoidBackward0>)


#### Save on GPU

In [11]:
SAVE_FROM_GPU_PATH="./Saved_models/model_gpu.pth"
test_x=torch.tensor([0.0,0.0,3.0,0.0,5.0,0.5])


# Save on GPU
device=torch.device("cuda")
model_gpu.to(device)
print("trained_model_gpu_state",model_gpu.state_dict())
torch.save(model_gpu.state_dict(),SAVE_FROM_GPU_PATH)
print("Saved on GPU :D")


# Load on CPU
print("Loading CPU ....")
device=torch.device('cpu')
n_input_features=6
model_gpu_loaded_cpu=Model(n_input_features=n_input_features)
model_gpu_loaded_cpu.load_state_dict(torch.load(SAVE_FROM_GPU_PATH,map_location=device))
print(model_gpu_loaded_cpu.state_dict())

model_gpu_loaded_cpu.eval()
print('After Load(CPU) Y:',model_gpu_loaded_cpu(test_x))

# Load on GPU
print("Loading GPU ....")
device=torch.device('cuda')
n_input_features=6
model_gpu_loaded_gpu=Model(n_input_features=n_input_features)
model_gpu_loaded_gpu.load_state_dict(torch.load(SAVE_FROM_GPU_PATH))
# model_gpu_loaded_gpu.load_state_dict(torch.load(SAVE_FROM_GPU_PATH,map_location=device)) # or just specify map_location which is the same :D
print(model_gpu_loaded_gpu.state_dict())

model_gpu_loaded_gpu.eval()
print('After Load(GPU) Y:',model_gpu_loaded_gpu(test_x))

trained_model_gpu_state OrderedDict([('linear.weight', tensor([[ 0.8653, -0.2219,  1.4056,  0.9043, -0.0538,  1.7966]],
       device='cuda:0')), ('linear.bias', tensor([-0.6246], device='cuda:0'))])
Saved on GPU :D
Loading CPU ....
OrderedDict([('linear.weight', tensor([[ 0.8653, -0.2219,  1.4056,  0.9043, -0.0538,  1.7966]])), ('linear.bias', tensor([-0.6246]))])
After Load(CPU) Y: tensor([0.9855], grad_fn=<SigmoidBackward0>)
Loading GPU ....
OrderedDict([('linear.weight', tensor([[ 0.8653, -0.2219,  1.4056,  0.9043, -0.0538,  1.7966]])), ('linear.bias', tensor([-0.6246]))])
After Load(GPU) Y: tensor([0.9855], grad_fn=<SigmoidBackward0>)


Note: If you don't specify the map_location parameter when using torch.load, PyTorch will attempt to load the model on the device where it was originally trained and saved. This can lead to issues if the model was trained and saved on a GPU, and you're trying to load it on a CPU or a different GPU.

#### Save on CPU