In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor

In [64]:
%reload_ext autoreload
%autoreload 1
%aimport stochastic_depth, resnet_with_stochastic_depth
from stochastic_depth import StochasticDepth
from resnet_with_stochastic_depth import conv3x3, conv1x1, BasicBlock, ResNet, resnet18

In [73]:
from torchvision.models.resnet import resnet18, ResNet18_Weights

In [59]:
maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
input = torch.empty([10, 3, 224, 224])
maxpool(input).shape

torch.Size([10, 3, 112, 112])

In [46]:
conv1 = conv3x3(3, 20, dilation=3)
input = torch.ones(10, 3, 224, 224)
print(conv1(input).shape)

torch.Size([10, 20, 112, 112])


In [49]:
conv2 = conv1x1(3, 20)
print(conv2(input).shape)

torch.Size([10, 20, 224, 224])


In [56]:
input = torch.ones(10, 60, 224, 224)
block = BasicBlock(60, 60)
print(block(input).shape)

torch.Size([10, 60, 224, 224])


In [70]:
model18 = resnet18()

In [69]:
model18

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (stochastic_depth): p=0.2
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   

In [72]:
model18.state_dict().keys()

odict_keys(['conv1.weight', 'bn1.weight', 'bn1.bias', 'bn1.running_mean', 'bn1.running_var', 'bn1.num_batches_tracked', 'layer1.0.conv1.weight', 'layer1.0.bn1.weight', 'layer1.0.bn1.bias', 'layer1.0.bn1.running_mean', 'layer1.0.bn1.running_var', 'layer1.0.bn1.num_batches_tracked', 'layer1.0.conv2.weight', 'layer1.0.bn2.weight', 'layer1.0.bn2.bias', 'layer1.0.bn2.running_mean', 'layer1.0.bn2.running_var', 'layer1.0.bn2.num_batches_tracked', 'layer1.1.conv1.weight', 'layer1.1.bn1.weight', 'layer1.1.bn1.bias', 'layer1.1.bn1.running_mean', 'layer1.1.bn1.running_var', 'layer1.1.bn1.num_batches_tracked', 'layer1.1.conv2.weight', 'layer1.1.bn2.weight', 'layer1.1.bn2.bias', 'layer1.1.bn2.running_mean', 'layer1.1.bn2.running_var', 'layer1.1.bn2.num_batches_tracked', 'layer2.0.conv1.weight', 'layer2.0.bn1.weight', 'layer2.0.bn1.bias', 'layer2.0.bn1.running_mean', 'layer2.0.bn1.running_var', 'layer2.0.bn1.num_batches_tracked', 'layer2.0.conv2.weight', 'layer2.0.bn2.weight', 'layer2.0.bn2.bias', '

In [77]:
model_state_dict = ResNet18_Weights.IMAGENET1K_V1.get_state_dict(True)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/chemstriker/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████████████████████████████████| 44.7M/44.7M [00:09<00:00, 4.82MB/s]


In [79]:
model18.load_state_dict(model_state_dict, strict=False)

RuntimeError: Error(s) in loading state_dict for ResNet:
	size mismatch for fc.weight: copying a param with shape torch.Size([1000, 512]) from checkpoint, the shape in current model is torch.Size([101, 512]).
	size mismatch for fc.bias: copying a param with shape torch.Size([1000]) from checkpoint, the shape in current model is torch.Size([101]).

In [15]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, p):
        super().__init__()
        self.ln = nn.Linear(input_size, hidden_size)
        self.stochastic_depth = StochasticDepth(p)

    def forward(self, inputs):
        return self.stochastic_depth(inputs)
        

In [27]:
net = Net(100, 50, 0.2)
net

Net(
  (ln): Linear(in_features=100, out_features=50, bias=True)
  (stochastic_depth): p=0.2
)

In [22]:
model_state_dict = net.state_dict()
model_state_dict

OrderedDict([('ln.weight',
              tensor([[-6.7965e-02, -4.7682e-02,  3.5075e-02,  ..., -6.0356e-02,
                        5.8965e-02, -9.1831e-02],
                      [ 7.5307e-02,  8.5223e-02,  2.2490e-02,  ...,  5.5933e-05,
                        2.0237e-02, -1.6986e-02],
                      [-5.3391e-02, -1.3324e-02,  2.9568e-02,  ..., -1.8244e-02,
                       -8.6001e-02, -1.0041e-03],
                      ...,
                      [ 6.2788e-02, -3.4302e-02,  9.6812e-02,  ..., -8.2388e-02,
                       -7.0445e-02,  2.1822e-02],
                      [-6.2316e-02, -3.0105e-02, -3.1371e-02,  ...,  8.7776e-02,
                       -8.4247e-02, -7.2727e-02],
                      [ 7.2396e-03,  2.2313e-02, -2.8747e-02,  ...,  7.0113e-02,
                       -3.5531e-02, -7.2335e-02]])),
             ('ln.bias',
              tensor([-0.0169,  0.0253, -0.0668, -0.0146, -0.0351, -0.0747,  0.0371,  0.0873,
                       0.0614,  0.0383

In [23]:
class NetExplicit(nn.Module):
    def __init__(self, input_size, hidden_size, dropout):
        super().__init__()
        self.ln = nn.Linear(input_size, hidden_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, inputs):
        return self.dropout(self.ln(inputs))
        

In [24]:
net2 = NetExplicit(100, 50, 0.2)
net2

NetExplicit(
  (ln): Linear(in_features=100, out_features=50, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [25]:
net2.load_state_dict(model_state_dict)

<All keys matched successfully>