In [23]:
''' 3 DIFFERENT METHODS TO REMEMBER:
 - torch.save(arg, PATH) # can be model, tensor, or dictionary
 - torch.load(PATH)
 - torch.load_state_dict(arg)
'''

''' 2 DIFFERENT WAYS OF SAVING
# 1) lazy way: save whole model
    torch.save(model, PATH)
    
# model class must be defined somewhere
    model = torch.load(PATH)
    model.eval()
    
# 2) recommended way: save only the state_dict
    torch.save(model.state_dict(), PATH)
    
# model must be created again with parameters
    model = Model(*args, **kwargs)
    model.load_state_dict(torch.load(PATH))
    model.eval()
'''

' 2 DIFFERENT WAYS OF SAVING\n# 1) lazy way: save whole model\n    torch.save(model, PATH)\n    \n# model class must be defined somewhere\n    model = torch.load(PATH)\n    model.eval()\n    \n# 2) recommended way: save only the state_dict\n    torch.save(model.state_dict(), PATH)\n    \n# model must be created again with parameters\n    model = Model(*args, **kwargs)\n    model.load_state_dict(torch.load(PATH))\n    model.eval()\n'

In [2]:
import torch
import torch.nn as nn

In [3]:
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

In [4]:
model = Model(n_input_features=6)

In [5]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[ 0.2233,  0.3439, -0.2179,  0.2347,  0.3749,  0.3446]],
       requires_grad=True)
Parameter containing:
tensor([-0.3124], requires_grad=True)


### Lazy Method

In [6]:
# save and load entire model

FILE = "model.pth"
torch.save(model, FILE)

In [7]:
loaded_model = torch.load(FILE)
loaded_model.eval()

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [8]:
for param in loaded_model.parameters():
    print(param)

Parameter containing:
tensor([[ 0.2233,  0.3439, -0.2179,  0.2347,  0.3749,  0.3446]],
       requires_grad=True)
Parameter containing:
tensor([-0.3124], requires_grad=True)


### Recommended Method

In [9]:
# save only state dict
FILE = "model.pth"
torch.save(model.state_dict(), FILE)

In [10]:
print(model.state_dict())

OrderedDict([('linear.weight', tensor([[ 0.2233,  0.3439, -0.2179,  0.2347,  0.3749,  0.3446]])), ('linear.bias', tensor([-0.3124]))])


In [11]:
loaded_model = Model(n_input_features=6)
loaded_model.load_state_dict(torch.load(FILE)) # it takes the loaded dictionary, not the path file itself
loaded_model.eval()

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [12]:
print(loaded_model.state_dict())

OrderedDict([('linear.weight', tensor([[ 0.2233,  0.3439, -0.2179,  0.2347,  0.3749,  0.3446]])), ('linear.bias', tensor([-0.3124]))])


### Save and Load Checkpoint

In [13]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [14]:
checkpoint = {
"epoch": 90,
"model_state": model.state_dict(),
"optim_state": optimizer.state_dict()
}

In [15]:
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0, 1]}]}


In [16]:
FILE = "checkpoint.pth"
torch.save(checkpoint, FILE)

In [17]:
model = Model(n_input_features=6)
optimizer = optimizer = torch.optim.SGD(model.parameters(), lr=0)

In [18]:
checkpoint = torch.load(FILE)
model.load_state_dict(checkpoint['model_state'])
optimizer.load_state_dict(checkpoint['optim_state'])
epoch = checkpoint['epoch']

model.eval()
# - or -
# model.train()

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [19]:
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0, 1]}]}


In [20]:
# Remember that you must call model.eval() to set dropout and batch normalization layers 
# to evaluation mode before running inference. Failing to do this will yield 
# inconsistent inference results. If you wish to resuming training, 
# call model.train() to ensure these layers are in training mode.

In [22]:
""" SAVING ON GPU/CPU 

# 1) Save on GPU, Load on CPU
    device = torch.device("cuda")
    model.to(device)
    torch.save(model.state_dict(), PATH)
    device = torch.device('cpu')
    model = Model(*args, **kwargs)
    model.load_state_dict(torch.load(PATH, map_location=device))

# 2) Save on GPU, Load on GPU
    device = torch.device("cuda")
    model.to(device)
    torch.save(model.state_dict(), PATH)
    model = Model(*args, **kwargs)
    model.load_state_dict(torch.load(PATH))
    model.to(device)
    
# Note: Be sure to use the .to(torch.device('cuda')) function 
# on all model inputs, too!

# 3) Save on CPU, Load on GPU
    torch.save(model.state_dict(), PATH)
    device = torch.device("cuda")
    model = Model(*args, **kwargs)
    model.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want
    model.to(device)
    # This loads the model to a given GPU device. 
    # Next, be sure to call model.to(torch.device('cuda')) to convert the model’s parameter tensors to CUDA tensors
"""

' SAVING ON GPU/CPU \n\n# 1) Save on GPU, Load on CPU\n    device = torch.device("cuda")\n    model.to(device)\n    torch.save(model.state_dict(), PATH)\n    device = torch.device(\'cpu\')\n    model = Model(*args, **kwargs)\n    model.load_state_dict(torch.load(PATH, map_location=device))\n\n# 2) Save on GPU, Load on GPU\n    device = torch.device("cuda")\n    model.to(device)\n    torch.save(model.state_dict(), PATH)\n    model = Model(*args, **kwargs)\n    model.load_state_dict(torch.load(PATH))\n    model.to(device)\n# Note: Be sure to use the .to(torch.device(\'cuda\')) function \n# on all model inputs, too!\n\n# 3) Save on CPU, Load on GPU\n    torch.save(model.state_dict(), PATH)\n    device = torch.device("cuda")\n    model = Model(*args, **kwargs)\n    model.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want\n    model.to(device)\n    # This loads the model to a given GPU device. \n    # Next, be sure to call model.to(torch.d