In [1]:
import torch
import torch.nn as nn

### Saving and Loading Models

#### 3 important steps to remember
- torch.save(arg, PATH)
- torch.load(PATH)
- torch.load_state_dict(arg)

#### 2 different ways to save the model

**1) Lazy Way**
torch.save(model, PATH)

Model must be defined somewhere
model = torch.load(PATH)
model.eval()

**reccomended way: save only the state_dict**
torch.save(model.state_dict(), PATH)

Here, model can be created using the state dict
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.eval()

In [3]:
# create a simple model

class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
    
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred
    
model = Model(n_input_features=6)
# train your model...

In [5]:
for param in model.parameters():
    print(param)
    
# save and load entire model
PATH = "model.pth"
torch.save(model, PATH)

loaded_model = torch.load(PATH)
loaded_model.eval()

for param in loaded_model.parameters():
    print(param)


Parameter containing:
tensor([[-0.0555,  0.1670,  0.1065, -0.3711, -0.2583, -0.0595]],
       requires_grad=True)
Parameter containing:
tensor([-0.1779], requires_grad=True)
Parameter containing:
tensor([[-0.0555,  0.1670,  0.1065, -0.3711, -0.2583, -0.0595]],
       requires_grad=True)
Parameter containing:
tensor([-0.1779], requires_grad=True)


In [6]:
# save and load only the model parameters (recommended)
FILE = "model.pth"

torch.save(model.state_dict(), FILE)

print(model.state_dict())

loaded_model = Model(n_input_features=6)
loaded_model.load_state_dict(torch.load(FILE))
loaded_model.eval()

print(loaded_model.state_dict())

OrderedDict([('linear.weight', tensor([[-0.0555,  0.1670,  0.1065, -0.3711, -0.2583, -0.0595]])), ('linear.bias', tensor([-0.1779]))])
OrderedDict([('linear.weight', tensor([[-0.0555,  0.1670,  0.1065, -0.3711, -0.2583, -0.0595]])), ('linear.bias', tensor([-0.1779]))])


In [9]:
# load checkpoint
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

checkpoint = {
    "epoch": 90,
    "model_state": model.state_dict(),
    "optim_state": optimizer.state_dict()
}

print(optimizer.state_dict())
FILE = "checkpoint.pth"

torch.save(checkpoint, FILE)

loaded_model = Model(n_input_features=6)
optimizer = optimizer = torch.optim.SGD(model.parameters(), lr=0)
checkpoint = torch.load(FILE)

loaded_model.load_state_dict(checkpoint["model_state"])
#loaded_model.load_state_dict(checkpoint["optim_state"])
epoch = checkpoint["epoch"]

model.eval()

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1]}]}


Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [10]:
""" SAVING ON GPU/CPU 

# 1) Save on GPU, Load on CPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

device = torch.device('cpu')
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location=device))

# 2) Save on GPU, Load on GPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.to(device)

# Note: Be sure to use the .to(torch.device('cuda')) function 
# on all model inputs, too!

# 3) Save on CPU, Load on GPU
torch.save(model.state_dict(), PATH)

device = torch.device("cuda")
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want
model.to(device)

# This loads the model to a given GPU device. 
# Next, be sure to call model.to(torch.device('cuda')) to convert the model’s parameter tensors to CUDA tensors
"""

' SAVING ON GPU/CPU \n\n# 1) Save on GPU, Load on CPU\ndevice = torch.device("cuda")\nmodel.to(device)\ntorch.save(model.state_dict(), PATH)\n\ndevice = torch.device(\'cpu\')\nmodel = Model(*args, **kwargs)\nmodel.load_state_dict(torch.load(PATH, map_location=device))\n\n# 2) Save on GPU, Load on GPU\ndevice = torch.device("cuda")\nmodel.to(device)\ntorch.save(model.state_dict(), PATH)\n\nmodel = Model(*args, **kwargs)\nmodel.load_state_dict(torch.load(PATH))\nmodel.to(device)\n\n# Note: Be sure to use the .to(torch.device(\'cuda\')) function \n# on all model inputs, too!\n\n# 3) Save on CPU, Load on GPU\ntorch.save(model.state_dict(), PATH)\n\ndevice = torch.device("cuda")\nmodel = Model(*args, **kwargs)\nmodel.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want\nmodel.to(device)\n\n# This loads the model to a given GPU device. \n# Next, be sure to call model.to(torch.device(\'cuda\')) to convert the model’s parameter tensors to CUDA 