In [1]:
# torch.save(arg, PATH)
# torch.load(PATH)
# model.load_state_dict(arg)

##### Method 1 Complete Model #####
# torch.save(model, PATH)

## model calss must be defined somewhere
# mdoel = torch.load(PATH)
# model.eval()

##### Method 2 State Dict (recommended) #####
# torch.save(model.state_dict(), PATH)

## mdoel must be created again with parameters
# model = Model(*args, **kwargs)
# model.load_state_dict(torch.load(PATH))
# model.eval()

In [None]:
## save on GPU, load on CPU
# device = torch.device("cuda")
# model.to(device)
# torch.save(model.state_dict(), PATH)

# device = torch.device("cpu")
# model = Model(*args, **kwargs)
# model.load_state_dict(torch.load(PATH, map_location=device))

In [None]:
## save on GPU, load on GPU
# device = torch.device("cuda")
# model.to(device)
# torch.save(model.state_dict(), PATH)

# model = Model(*args, **kwargs)
# model.load_state_dict(torch.load(PATH))
# model.to(device)

In [None]:
## save on CPU, load on GPU
# torch.save(model.state_dict(), PATH)

# device = torch.device("cuda")
# model = Model(*args, **kwargs)
# model.load_state_dict(torch.load(PATH, map_location="cuda:0")) # choose whatever GPU device number
# model.to(device)

In [2]:
import torch
import torch.nn as nn
import os

In [3]:
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
        
    def forward(self, x):
        y_pred = torch.sigmoid(self.Linear(x))
        return y_pred

In [4]:
original_model = Model(n_input_features=6)
# train the model...
print(original_model.state_dict())

OrderedDict([('linear.weight', tensor([[ 0.0284,  0.3091, -0.3039, -0.1332, -0.0445, -0.1158]])), ('linear.bias', tensor([0.1065]))])


### Method 1

In [5]:
FILE1 = "model.pth"
torch.save(original_model, FILE1)

In [6]:
model_complete = torch.load(FILE1)
model_complete.eval()

for param in model_complete.parameters():
    print(param)

Parameter containing:
tensor([[ 0.0284,  0.3091, -0.3039, -0.1332, -0.0445, -0.1158]],
       requires_grad=True)
Parameter containing:
tensor([0.1065], requires_grad=True)


### Method 2

In [7]:
FILE2 = "model_dict.pth"
torch.save(original_model.state_dict(), FILE2)

In [8]:
model_dict = Model(n_input_features=6)
model_dict.load_state_dict(torch.load(FILE2))
model_dict.eval()

for param in model_dict.parameters():
    print(param)

Parameter containing:
tensor([[ 0.0284,  0.3091, -0.3039, -0.1332, -0.0445, -0.1158]],
       requires_grad=True)
Parameter containing:
tensor([0.1065], requires_grad=True)


### Checkpoint

In [9]:
learning_rate = 0.01
optimizer = torch.optim.SGD(original_model.parameters(), lr=learning_rate)
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'params': [0, 1]}]}


In [10]:
FILE3 = "checkpoint.pth"

checkpoint = {
    "epoch": 90,
    "model_state": original_model.state_dict(),
    "optim_state": optimizer.state_dict()
}

torch.save(checkpoint, FILE3)

In [11]:
model_checkpoint = Model(n_input_features=6)
optimizer_checkpoint = torch.optim.SGD(original_model.parameters(), lr=learning_rate)

loaded_checkpoint = torch.load(FILE3)
epoch = loaded_checkpoint["epoch"]
model_checkpoint.load_state_dict(checkpoint["model_state"])
optimizer_checkpoint.load_state_dict(checkpoint["optim_state"])

print(optimizer_checkpoint.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'params': [0, 1]}]}


In [12]:
os.remove(FILE1)
os.remove(FILE2)
os.remove(FILE3)