Whether you are loading from a partial __state_dict__, which is missing some keys, or loading a __state_dict__ with more keys than the model that you are loading into, you can set the strict argument to False in the __load_state_dict()__ function to ignore non-matching keys. In this recipe, we will experiment with warmstarting a model using parameters of a different model

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim


In [None]:
class NetA(nn.Module):
  def __init__(self):
    super(NetA, self).__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.fc1 = nn.Linear(16 * 5 * 5, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)

  def forward(self, x):
    x = self.pool(torch.F.relu(self.conv1(x)))
    x = self.pool(torch.F.relu(self.conv2(x)))
    x = x.view(-1, 16 * 5 * 5)
    x = torch.F.relu(self.fc1(x))
    x = torch.F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

netA = NetA()

class NetB(nn.Module):
    def __init__(self):
        super(NetB, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.F.relu(self.conv1(x)))
        x = self.pool(torch.F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = torch.F.relu(self.fc1(x))
        x = torch.F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

netB = NetB()

In [None]:
PATH = 'model.pt'
torch.save(netA.state_dict(), PATH)

In [None]:
netB.load_state_dict(torch.load(PATH), strict=False)