# PyTorch Tutorial 17 - Saving and Loading Models

In [1]:
import torch 
import torch.nn as nn

### 1st Option

In [None]:
#### COMPLETE MODEL ####
torch.save(model, PATH)

model = torch.load(PATH)
model.eval()


### 2nd Option

In [None]:
#### STATE DICT ####
torch.save(model.state_dict(), PATH)

#### model must be created again with params
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.eval()

### Example

##### LAZY OPTION

In [2]:
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred


model = Model(n_input_features=6)
# model training

In [3]:
FILE = "./data/model.pth"

In [4]:
torch.save(model, FILE)

In [5]:
model1 = torch.load(FILE)

In [6]:
model1

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [7]:
model1.eval()

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [8]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.0860,  0.0468, -0.3693,  0.0984,  0.1204, -0.0503]],
       requires_grad=True)
Parameter containing:
tensor([0.0690], requires_grad=True)


# Another option

In [9]:
FILE = "./data/model1.pth"
torch.save(model.state_dict(), FILE)

In [10]:
loaded_model = Model(n_input_features=6)
loaded_model.load_state_dict(torch.load(FILE))

<All keys matched successfully>

In [11]:
loaded_model.eval()

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [12]:
for param in loaded_model.parameters():
    print(param)

Parameter containing:
tensor([[-0.0860,  0.0468, -0.3693,  0.0984,  0.1204, -0.0503]],
       requires_grad=True)
Parameter containing:
tensor([0.0690], requires_grad=True)


In [13]:
model.state_dict()

OrderedDict([('linear.weight',
              tensor([[-0.0860,  0.0468, -0.3693,  0.0984,  0.1204, -0.0503]])),
             ('linear.bias', tensor([0.0690]))])

# Saving additional params

In [14]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer.state_dict()

{'state': {},
 'param_groups': [{'lr': 0.01,
   'momentum': 0,
   'dampening': 0,
   'weight_decay': 0,
   'nesterov': False,
   'maximize': False,
   'foreach': None,
   'differentiable': False,
   'params': [0, 1]}]}

In [15]:
checkpoint = {
    "epoch" : 90, 
    "model_state" : model.state_dict(), 
    "optim_state" : optimizer.state_dict()
}

FILE = "./data/checkpoint.pth"
torch.save(checkpoint, FILE)

In [16]:
loaded_checkpoint = torch.load(FILE)
loaded_checkpoint

{'epoch': 90,
 'model_state': OrderedDict([('linear.weight',
               tensor([[-0.0860,  0.0468, -0.3693,  0.0984,  0.1204, -0.0503]])),
              ('linear.bias', tensor([0.0690]))]),
 'optim_state': {'state': {},
  'param_groups': [{'lr': 0.01,
    'momentum': 0,
    'dampening': 0,
    'weight_decay': 0,
    'nesterov': False,
    'maximize': False,
    'foreach': None,
    'differentiable': False,
    'params': [0, 1]}]}}

In [17]:
epoch = loaded_checkpoint["epoch"]
model = Model(n_input_features=6)
# optimizer = loaded_checkpoint["optim_state"]
optimizer =  torch.optim.SGD(model.parameters(), lr=0)

model.load_state_dict(checkpoint["model_state"])
optimizer.load_state_dict(checkpoint["optim_state"])

In [18]:
optimizer.state_dict()

{'state': {},
 'param_groups': [{'lr': 0.01,
   'momentum': 0,
   'dampening': 0,
   'weight_decay': 0,
   'nesterov': False,
   'maximize': False,
   'foreach': None,
   'differentiable': False,
   'params': [0, 1]}]}

## Saving models on CPU / GPU and loading them onto CPU / GPU

In [None]:
#### Save on GPU & Load on CPU

device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)


device = torch.device("cpu")
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location=device))

In [None]:
#### Save on GPU & Load on GPU

device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)


model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.to(device)

In [None]:
#### Save on CPU & Load on GPU

torch.save(model.state_dict(), PATH)

device = torch.device("cuda")
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location="cuda:0"))
model.to(device)