In [None]:
!nvidia-smi
import os
from google.colab import drive
drive.mount('/content/drive')
path = '/content/drive/My Drive' 
os.chdir(path)
os.listdir(path)

In [24]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets

class Inception(nn.Module):
  def __init__(self, in_channels):
    super(Inception, self).__init__()
    self.branch1_2 = nn.Conv2d(in_channels, 24,kernel_size=1)#pool不会改变channel数量
    
    self.branch2_1 = nn.Conv2d(in_channels,16,kernel_size=1)
    
    self.branch3_1 = nn.Conv2d(in_channels,16,kernel_size=1)
    self.branch3_2 = nn.Conv2d(16,24,kernel_size=5,padding=2)#保证w/h大小不变 padding=2
    
    self.branch4_1 = nn.Conv2d(in_channels,16,kernel_size=1)
    self.branch4_2 = nn.Conv2d(16,24,kernel_size=3,padding=1)
    self.branch4_3 = nn.Conv2d(24,24,kernel_size=3,padding=1)
    
  def forward(self,x):#x是输入的tensor
    #为使得pool后w和h也不发生变化，w/h + 2(因为padding=1) - 3(kernel_size) + 1 =w/h
    branch1 = F.avg_pool2d(x,kernel_size=3,stride=1,padding=1)
    branch1 = self.branch1_2(branch1)

    branch2 = self.branch2_1(x)

    branch3 = self.branch3_1(x)
    branch3 = self.branch3_2(branch3)

    branch4 = self.branch4_1(x)
    branch4 = self.branch4_2(branch4)
    branch4 = self.branch4_3(branch4)

    outputs = [branch1,branch2,branch3,branch4]
    outputs = torch.cat(outputs,dim=1)
    #outputs的形状(batch_size, channel, weight,height),沿着第二个纬度channel进行拼接
    #拼接后的形状为(batch_size,channel1+2+3+4, weight,height)
    return outputs

In [25]:
#模型层 input-------->卷积层1------------>maxpool---------->relu->Inception1----------->卷积层2--------->maxpool---------->relu->Inception2--------->fc----->output
#(batch_size,1,28,28)      (b,10,24,24)       (b,10,12,12)                (b,88,12,12)      (b,20,8,8)       (b,20,4,4)                 (b,88,4,4)   (b,10)
class Model(nn.Module):
  def __init__(self):
    super(Model,self).__init__()
    self.conv1 = nn.Conv2d(1,10,kernel_size=5)#卷积层1
    self.conv2 = nn.Conv2d(88,20,kernel_size=5)#卷积层2 
    #88= output size of Inception1 = 24+16+24+24 

    self.incep1 = Inception(in_channels=10)
    self.incep2 = Inception(in_channels=20)

    self.mp = nn.MaxPool2d(2)

    self.fc = nn.Linear(1408,10)#1408=c*w*h=88*4*4
  
  def forward(self,x):
    batch_size=x.size(0)
    x = F.relu(self.mp(self.conv1(x)))
    x = self.incep1(x)
    x = F.relu(self.mp(self.conv2(x)))
    x = self.incep2(x)
    x = x.view(batch_size,-1)
    x = self.fc(x)
    return x

In [None]:
batch_size = 64
transform = transforms.Compose([
  transforms.ToTensor(),          
  transforms.Normalize((0.1307,),(0.3081,))                
])

train_set = datasets.MNIST(root='Colab Notebooks/dataset',
              train=True,           
              transform=transform,       
              download=True)
test_set = datasets.MNIST(root='Colab Notebooks/dataset',
              train=False,          
              transform=transform,
              download=True)

#加载数据集
train_loader = DataLoader(dataset=train_set,
              batch_size=batch_size,
              shuffle=True)
test_loader = DataLoader(dataset=test_set,
              batch_size=batch_size,
              shuffle=False)

model = Model()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

#使用GPU计算
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model.to(device)

In [27]:
def train(epoch):
  running_loss = 0.0
  for batch_idx,(input,target) in enumerate(train_loader):
    input,target = input.to(device), target.to(device)#使用GPU计算
    optimizer.zero_grad()
    output = model(input)
    loss = criterion(output,target)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()
    if batch_idx % 300 == 299:
      print('[%d,%5d] loss: %.3f' % (epoch + 1, batch_idx +1, running_loss/300))
      running_loss = 0.0

def test():
  correct = 0
  total = 0
  with torch.no_grad(): 
    for input,target in test_loader:
      input,target = input.to(device), target.to(device)#使用GPU计算
      output = model(input)
      _,predicted = torch.max(output.data,dim=1)
      total+=target.size(0)
      correct += (predicted == target).sum().item()
  print('Accuracy on test set:%d %%' % (100*correct/total))


for epoch in range(10):
  train(epoch)
  test()

[1,  300] loss: 0.723
[1,  600] loss: 0.200
[1,  900] loss: 0.136
Accuracy on test set:96 %
[2,  300] loss: 0.116
[2,  600] loss: 0.098
[2,  900] loss: 0.092
Accuracy on test set:97 %
[3,  300] loss: 0.078
[3,  600] loss: 0.073
[3,  900] loss: 0.075
Accuracy on test set:97 %
[4,  300] loss: 0.068
[4,  600] loss: 0.063
[4,  900] loss: 0.059
Accuracy on test set:98 %
[5,  300] loss: 0.056
[5,  600] loss: 0.054
[5,  900] loss: 0.053
Accuracy on test set:98 %
[6,  300] loss: 0.049
[6,  600] loss: 0.048
[6,  900] loss: 0.048
Accuracy on test set:98 %
[7,  300] loss: 0.044
[7,  600] loss: 0.044
[7,  900] loss: 0.045
Accuracy on test set:98 %
[8,  300] loss: 0.041
[8,  600] loss: 0.041
[8,  900] loss: 0.039
Accuracy on test set:98 %
[9,  300] loss: 0.039
[9,  600] loss: 0.036
[9,  900] loss: 0.036
Accuracy on test set:98 %
[10,  300] loss: 0.031
[10,  600] loss: 0.037
[10,  900] loss: 0.036
Accuracy on test set:98 %
