In [1]:
import torch

In [2]:
size = (2, 3, 4)
a = torch.empty(size)
print(a)

tensor([[[1.6622e+32, 6.5160e-43, 1.6622e+32, 6.5160e-43],
         [1.1043e+35, 6.5160e-43, 7.7789e+33, 6.5160e-43],
         [1.6622e+32, 6.5160e-43, 1.6622e+32, 6.5160e-43]],

        [[1.6622e+32, 6.5160e-43, 1.6622e+32, 6.5160e-43],
         [7.7554e+33, 6.5160e-43, 7.7554e+33, 6.5160e-43],
         [1.6622e+32, 6.5160e-43, 1.6622e+32, 6.5160e-43]]])


In [3]:
b = torch.empty_like(a)
print(b, b.shape, sep = "\n")

tensor([[[1.8628e+32, 6.5160e-43, 1.8628e+32, 6.5160e-43],
         [1.8689e+32, 6.5160e-43, 1.8689e+32, 6.5160e-43],
         [1.1041e+35, 6.5160e-43, 7.7636e+33, 6.5160e-43]],

        [[1.6622e+32, 6.5160e-43, 1.6622e+32, 6.5160e-43],
         [1.8453e+32, 6.5160e-43, 1.8453e+32, 6.5160e-43],
         [1.1058e+35, 6.5160e-43, 1.8518e+32, 6.5160e-43]]])
torch.Size([2, 3, 4])


In [4]:
print(b.random_(5))
print(b.random_(10))

tensor([[[2., 3., 3., 2.],
         [0., 3., 1., 2.],
         [1., 0., 1., 3.]],

        [[3., 2., 1., 2.],
         [0., 2., 4., 3.],
         [4., 2., 2., 3.]]])
tensor([[[2., 5., 9., 5.],
         [8., 6., 6., 0.],
         [3., 1., 8., 3.]],

        [[9., 4., 5., 5.],
         [8., 2., 2., 6.],
         [6., 1., 9., 7.]]])


In [5]:
c = torch.tensor([2, 3], dtype = torch.float16, requires_grad = True)
d = torch.tensor([5, 6], dtype = torch.float32, requires_grad = True)
print(c, d)

c = torch.tensor([2, 3], requires_grad = True)
d = torch.tensor([5, 6], requires_grad = True)
print(c, d)

tensor([2., 3.], dtype=torch.float16, requires_grad=True) tensor([5., 6.], requires_grad=True)


RuntimeError: Only Tensors of floating point and complex dtype can require gradients

In [6]:
c = torch.tensor([2., 3], requires_grad = True)
d = torch.tensor([5., 6], requires_grad = True)
print(c, d)

tensor([2., 3.], requires_grad=True) tensor([5., 6.], requires_grad=True)


In [7]:
e = 4 * c + 5 * d
print(e)
print(c.grad)
print(d.grad)
print(e.grad)

tensor([33., 42.], grad_fn=<AddBackward0>)
None
None
None


  print(e.grad)


In [8]:
f = torch.empty(2)
e.backward(gradient = f)
print(e)
print(f)

tensor([33., 42.], grad_fn=<AddBackward0>)
tensor([ 0.0000, 18.9802])


In [9]:
print(c.grad)
print(d.grad)
print(e.grad)
print(f.grad)

tensor([ 0.0000, 75.9209])
tensor([ 0.0000, 94.9012])
None
None


  print(e.grad)


In [10]:
c1 = torch.tensor([2., 3], requires_grad = True)
d1 = torch.tensor([5., 6], requires_grad = True)
print(c1, d1)
e1 = 4 * c1 + 5 * d1
print(e1)
print(c1.grad)
print(d1.grad)
print(e1.grad)
f1 = torch.tensor([1., 1])
e1.backward(gradient = f1)
print(e1)
print(f1)
print(c1.grad)
print(d1.grad)
print(e1.grad)
print(f1.grad)

tensor([2., 3.], requires_grad=True) tensor([5., 6.], requires_grad=True)
tensor([33., 42.], grad_fn=<AddBackward0>)
None
None
None
tensor([33., 42.], grad_fn=<AddBackward0>)
tensor([1., 1.])
tensor([4., 4.])
tensor([5., 5.])
None
None


  print(e1.grad)
  print(e1.grad)


In [11]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()
print(net)
optimizer = optim.SGD(net.parameters(), lr = 0.001, momentum = 0.9)

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [12]:
print("Model's state_dict:")
for param_tensor in net.state_dict():
    print(param_tensor, "\t", net.state_dict()[param_tensor].size())

print()

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Model's state_dict:
conv1.weight 	 torch.Size([6, 3, 5, 5])
conv1.bias 	 torch.Size([6])
conv2.weight 	 torch.Size([16, 6, 5, 5])
conv2.bias 	 torch.Size([16])
fc1.weight 	 torch.Size([120, 400])
fc1.bias 	 torch.Size([120])
fc2.weight 	 torch.Size([84, 120])
fc2.bias 	 torch.Size([84])
fc3.weight 	 torch.Size([10, 84])
fc3.bias 	 torch.Size([10])

Optimizer's state_dict:
state 	 {}
param_groups 	 [{'lr': 0.001, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}]


Using this approach yields the most intuitive syntax and involves the least amount of code. The disadvantage of this approach is that the
serialized data is bound to the specific classes and the exact directory structure used when the model is saved. The reason for this is because
pickle does not save the model class itself. Rather, it saves a path to the file containing the class, which is used during load time. Because
of this, your code can break in various ways when used in other projects or after refactors. In this recipe, we will explore both ways on how to
save and load models for inference.

https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_models_for_inference.html

In [13]:
# Specify a path
PATH = "state_dict_model.pt"

# Save
torch.save(net.state_dict(), PATH)

# Load
model = Net()
model.load_state_dict(torch.load(PATH))
model.eval()

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

Remember too, that you must call model.eval() to set dropout and batch normalization layers to evaluation mode before running inference.
Failing to do this will yield inconsistent inference results.

https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_models_for_inference.html

In [None]:
# Specify a path to save to
PATH = "model.pt"

torch.save({
    'modelA_state_dict': netA.state_dict(),
    'modelB_state_dict': netB.state_dict(),
    'optimizerA_state_dict': optimizerA.state_dict(),
    'optimizerB_state_dict': optimizerB.state_dict(),
}, PATH)

modelA = Net()
modelB = Net()
optimModelA = optim.SGD(modelA.parameters(), lr = 0.001, momentum = 0.9)
optimModelB = optim.SGD(modelB.parameters(), lr = 0.001, momentum = 0.9)

checkpoint = torch.load(PATH)
modelA.load_state_dict(checkpoint['modelA_state_dict'])
modelB.load_state_dict(checkpoint['modelB_state_dict'])
optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])
optimizerB.load_state_dict(checkpoint['optimizerB_state_dict'])

modelA.eval()
modelB.eval()
# - or -
modelA.train()
modelB.train()
# https://pytorch.org/tutorials/recipes/recipes/saving_multiple_models_in_one_file.html

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    # x represents our data
    def forward(self, x):
        # Pass data through conv1
        x = self.conv1(x)
        # Use the rectified-linear activation function over x
        x = F.relu(x)

        x = self.conv2(x)
        x = F.relu(x)

        # Run max pooling over x
        x = F.max_pool2d(x, 2)
        # Pass data through dropout1
        x = self.dropout1(x)
        # Flatten x with start_dim=1
        x = torch.flatten(x, 1)
        # Pass data through fc1
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)

        # Apply softmax to x
        output = F.log_softmax(x, dim = 1)
        return output

#https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html