In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        
        self.fc1 = nn.Linear(16 * 6 * 6, 120) 
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x,2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x,2)
        x = x.view(-1, self.num_flat_features(x))  #到fc层需要faltten，而conv要unflatten
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x
    
    def num_flat_features(self,x):
        size = x.size()[1:]
        features = 1
        for i in size:
            features *= i
        
        return features

net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [3]:

input = torch.randn(1,32,32)
input = input.unsqueeze(0) #turn a single data into minibatch
out = net(input)
print(out)

tensor([[ 0.0542, -0.1395, -0.0264,  0.0050,  0.0757, -0.0085, -0.1235,  0.0873,
          0.0009,  0.0981]], grad_fn=<AddmmBackward>)


In [4]:
#compute loss
target = torch.randn(1,10)
criterion = nn.MSELoss()

loss = criterion(out, target)
print(loss)

tensor(0.8296, grad_fn=<MseLossBackward>)


In [5]:
#compute gradient
net.zero_grad()
loss.backward()  #执行后会自动free（无法再backward），如果需要保留需要写成.backward(retain_graph=True)

In [6]:
#update
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr = 0.01)
print(net.conv1.bias)
optimizer.step()
print(net.conv1.bias)

Parameter containing:
tensor([ 0.0710, -0.3186,  0.0278, -0.0126,  0.2793,  0.1187],
       requires_grad=True)
Parameter containing:
tensor([ 0.0710, -0.3186,  0.0278, -0.0126,  0.2792,  0.1188],
       requires_grad=True)
