In [4]:
import torch
import torch.nn as nn
"""
Lazy way:

torch.save(arg, PATH)

torch.load(PATH)
mode.eval() - Set model to evaluation mde
"""

"""
Better way:
(Model must be created again with parameters)

torch.save(model.state_dict(), PATH) - saves parameters
NOTE: model.state_dict() holds parameters

model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.eval()
"""

# NOTE: kwargs = keyword argumetns i.e. the dictionary of arguments i.e. the actual argument values

'\nBetter way:\n(Model must be created again with parameters)\n\ntorch.save(model.state_dict(), PATH) - saves parameters\nNOTE: model.state_dict() holds parameters\n\nmodel = Model(*args, **kwargs)\nmodel.load_state_dict(torch.load(PATH))\nmodel.eval()\n'

In [8]:
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
        
        def forward(self, x):
            y_pred = torch.sigmoid(self.liner(x))
            
            return y_pred
    
model = Model(n_input_features = 6)

# Train your model here...


FILE = "model.pth" # Unreadable - serialized data
# torch.save(model, FILE)

# Load model
model = torch.load(FILE)
model.eval() # Set to evaluation mode

for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.0917,  0.2148, -0.3338, -0.0089,  0.2943, -0.3799]],
       requires_grad=True)
Parameter containing:
tensor([-0.1133], requires_grad=True)


In [11]:
# Better way
# Save state dict only
# state_dict holds parameters

FILE = "model.pth" # Unreadable - serialized data


# torch.save(model.state_dict(), FILE)

# We have to re-define model

loaded_model = Model(n_input_features = 6)
loaded_model.load_state_dict(torch.load(FILE))

for param in loaded_model.parameters():
    print(param) # Weight, bias

Parameter containing:
tensor([[-0.0917,  0.2148, -0.3338, -0.0089,  0.2943, -0.3799]],
       requires_grad=True)
Parameter containing:
tensor([-0.1133], requires_grad=True)


In [12]:
print(model.state_dict()) # weight tensor, bias tensor

OrderedDict([('linear.weight', tensor([[-0.0917,  0.2148, -0.3338, -0.0089,  0.2943, -0.3799]])), ('linear.bias', tensor([-0.1133]))])


In [28]:
# Common way of saving checkpoint during training

learning_rate =.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

print(optimizer.state_dict())

# Say we want to save a checkpoint

checkpoint = {
    "epoch" : 90, # Would be a real one
    "model_state" : model.state_dict(),
    "optim_state" : optimizer.state_dict() # Optimizer state_dict
}

torch.save(checkpoint, "checkpoint.pth") # Thing, filename

# Load checkpoint

loaded_checkpoint = torch.load("checkpoint.pth")

# Now we have to set up differnt model and optmizer agiani

epoch = loaded_checkpoint["epoch"]

model = Model(n_input_features = 6)
optimizer = torch.optim.SGD(model.parameters(), lr=0) # Will load the correct leraning later leter

model.load_state_dict(checkpoint["model_state"]) # Give model state dict
optimizer.load_state_dict(checkpoint["optim_state"]) # Same with optimizer

print()
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1]}]}

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1]}]}


In [None]:
# If doing training on gpu
# Save on GPU, load on CPU

# Save on GPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

# Load on CPU
device = torch.device('cpu')
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location = device))

In [None]:
# If you want to save and load on GPU

# Save on GPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

# Load on GPU
model = Model(*args, **kwargs)
model.model_state_dict(torch.load(PATH))
model.to(device)

In [None]:
# Save on CPU, load on GPU

# Save on CPU
torch.save(model.state_dict(), PATH)

# Load on GPU

device = torch.device("cuda")
model = Model(*args, kwargs)
model.load_state_dict(torch.load(PATH, map_location = "cuda:0")) # Choose whatever GPU device you want
model.to(device)