In [1]:
import torch
from torch import nn
import torchvision
from torchvision.models import alexnet, vgg11 

In [2]:
alexnet()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [5]:
classifier = alexnet().classifier
classifier

Sequential(
  (0): Dropout(p=0.5, inplace=False)
  (1): Linear(in_features=9216, out_features=4096, bias=True)
  (2): ReLU(inplace=True)
  (3): Dropout(p=0.5, inplace=False)
  (4): Linear(in_features=4096, out_features=4096, bias=True)
  (5): ReLU(inplace=True)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [7]:
model = alexnet()
classifier = model.classifier
classifier

Sequential(
  (0): Dropout(p=0.5, inplace=False)
  (1): Linear(in_features=9216, out_features=4096, bias=True)
  (2): ReLU(inplace=True)
  (3): Dropout(p=0.5, inplace=False)
  (4): Linear(in_features=4096, out_features=4096, bias=True)
  (5): ReLU(inplace=True)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [9]:
classifier[-1] = nn.Linear(4096, 5)

In [10]:
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [11]:
model = nn.Sequential(vgg11(),
                      nn.Linear(1000, 512), nn.ReLU(),
                      nn.Linear(512, 256), nn.ReLU(),
                      nn.Linear(256, 5))
model

Sequential(
  (0): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): ReLU(inplace=True)
      (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (7): ReLU(inplace=True)
      (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): ReLU(inplace=True)
      (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (12): ReLU(inplace=True)
      (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (14): ReLU(inplace=True)
      (15): MaxPool2d(kernel_size=2

In [13]:
list(model.children())

[VGG(
   (features): Sequential(
     (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (1): ReLU(inplace=True)
     (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
     (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (4): ReLU(inplace=True)
     (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
     (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (7): ReLU(inplace=True)
     (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (9): ReLU(inplace=True)
     (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
     (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (12): ReLU(inplace=True)
     (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (14): ReLU(inplace=True)
     (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, 

In [16]:
list(model.modules())

[Sequential(
   (0): VGG(
     (features): Sequential(
       (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
       (1): ReLU(inplace=True)
       (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
       (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
       (4): ReLU(inplace=True)
       (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
       (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
       (7): ReLU(inplace=True)
       (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
       (9): ReLU(inplace=True)
       (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
       (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
       (12): ReLU(inplace=True)
       (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
       (14): ReLU(inplace=True)
       (15): MaxP

In [18]:
list(model.parameters())

[Parameter containing:
 tensor([[[[-2.9169e-02,  4.1014e-02,  4.4159e-02],
           [-4.8434e-02, -3.5437e-02, -3.1144e-02],
           [ 3.1146e-02, -3.0614e-02,  1.8627e-03]],
 
          [[ 8.5245e-03,  9.4478e-02, -4.3957e-02],
           [ 7.1241e-02,  4.2402e-02,  1.1551e-01],
           [-7.5308e-03,  9.1775e-02,  2.5929e-02]],
 
          [[-9.7027e-02, -4.8624e-02, -1.3425e-02],
           [ 1.2409e-02,  8.1328e-02, -2.4868e-02],
           [ 2.9008e-02, -2.8839e-02, -1.6140e-01]]],
 
 
         [[[ 5.5942e-03, -2.7094e-02,  9.5102e-02],
           [ 4.6550e-02, -3.6596e-02, -1.2261e-01],
           [ 2.8317e-02, -5.2859e-02, -2.5432e-02]],
 
          [[-3.5733e-02, -4.5143e-02, -4.3143e-02],
           [-3.9233e-02, -2.5334e-02, -6.5670e-02],
           [-9.6976e-02,  1.6893e-02, -8.7993e-02]],
 
          [[-7.7765e-02, -6.3024e-03, -6.3182e-02],
           [-3.3394e-02,  2.8179e-02, -1.5852e-02],
           [-5.2879e-02,  3.0815e-02,  1.0088e-02]]],
 
 
         [[[-4.25

In [21]:
model = vgg11()
linear_layer = model.classifier[0]
linear_layer

Linear(in_features=25088, out_features=4096, bias=True)

In [22]:
linear_layer.weight.shape, linear_layer.bias.shape

(torch.Size([4096, 25088]), torch.Size([4096]))

In [23]:
linear_layer.weight.clone??

In [24]:
linear_layer.weight.detach??

In [34]:
torch.save(model, '/content/sample_data/model.bin')

In [35]:
loaded_model = torch.load('/content/sample_data/model.bin')


In [36]:
loaded_model.features[0].weight.data[0][0][0]

tensor([ 0.1120,  0.0590, -0.0071])

In [37]:
torch.save(model.state_dict(), '/content/sample_data/state_dict.bin')

In [38]:
loaded_model1 = torch.load('/content/sample_data/state_dict.bin')
loaded_model.features[0].weight.data[0][0][0]

tensor([ 0.1120,  0.0590, -0.0071])