# 基于CNN模型的物体识别

## 0.先完成理论部分即可

## 1.复习上课内容以及复现课程代码

在本部分，你需要复习上课内容和课程代码后，自己复现课程代码。

## 2.回答以下理论题目?

### 2.1. Suppose your input is a 100 by 100 gray image, and you use a convolutional layer with 50 filters that are each 5x5. How many parameters does this hidden layer have (including the bias parameters)? 

#### （100 - 5 + 1）X（100 - 5 + 1）X 50 

### 2.2. What are "local invariant" and "parameter sharing" ?

- **因为图片的底层特征是与特征在图片中的位置无关的，所以采用local invariant以及parameter sharing可以减少参数数量，使运算变得简洁、高效，能够在超大规模数据集上运算**



### 2.3. Why we use batch normalization ?
- 批处理规范化（也称为批处理规范）是一种用于提高人工神经网络的速度，性能和稳定性的技术。


- **Batch Normalization是在一种巧妙且粗暴的方法来削弱bad initialization的影响，其基本思想是：If you want it,just make it!我们想的是在非线性activation之前，输出值应该有比较好的分布（例如高斯分布），以便于back propagation时计算gradient，更新weight。**

### 2.4. What problem does dropout try to solve ?
- **费时，容易过拟合**


因为dropout它强迫一个神经单元，和随机挑选出来的其他神经单元共同工作，达到好的效果。消除减弱了神经元节点间的联合适应性，增强了泛化能力。

## 3. 实践题

### 3.1 In the first session of the practical part, you will implement an image classification model using any deep learning libraries that you are familiar with,  which means, except for tensorflow and keras, you can also use pytorch/caffe/... .  The dataset used in this session is the cifar10 which contains 50000 color (RGB) images, each with size 32x32x3.  All 50000 images are classified into ten categories. 

In [2]:
import torch 
import torchvision
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim 
import torchvision.transforms as transforms
import torch.nn.functional as F

# 准备数据集并预处理
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),  #先四周填充0，在把图像随机裁剪成32*32
    transforms.RandomHorizontalFlip(),  #图像一半的概率翻转，一半的概率不翻转
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #R,G,B每层的归一化用到的均值和方差
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# 读取数据集
train_dataset = torchvision.datasets.CIFAR10(
      root = './',
      train = True,
      transform = transform_train,
      download = True)

test_dataset = torchvision.datasets.CIFAR10(
     root = './',
     train = False,
     transform = transform_test,
     download = True
 )

class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1):
        super(ResidualBlock, self).__init__()
        self.left = nn.Sequential(
            nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(inplace=True),
            nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(outchannel)
        )
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outchannel)
            )

    def forward(self, x):
        out = self.left(x)
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, ResidualBlock, num_classes=10):
        super(ResNet, self).__init__()
        self.inchannel = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )
        self.layer1 = self.make_layer(ResidualBlock, 64,  2, stride=1)
        self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride=2)
        self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride=2)
        self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)
        self.fc = nn.Linear(512, num_classes)

    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)   #strides=[1,1]
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
    
def ResNet18():
    return ResNet(ResidualBlock)
net =ResNet18()
# print(net)
# 超参数设置
learn_rate = 0.0001
num_epoches = 180

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(),lr = learn_rate,betas = (0.9,0.99),weight_decay=5e-4)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# device = torch.device('cpu')

# 图片分批次送入内存（32张图片,batch_size），进行计算。
train_dataloader = DataLoader(dataset=train_dataset,batch_size=32,shuffle=True,num_workers=2)
test_dataloader = DataLoader(dataset=test_dataset)

# 训练阶段
net.to(device)

net.train()
for epoch in range(num_epoches):
    print(f'epoch:{epoch + 1}')
    for idx,(img,label) in enumerate(train_dataloader):
        images = img.to(device)
        labels = label.to(device)
        output = net(images)
        loss = criterion(output,labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if idx % 100 == 0:
            print(f"| Step={idx//100}，current loss = {loss.item()}")

        
# 测试阶段
net.to(device)
net.eval()
total,correct = 0,0
for img,label in test_dataloader:
    images = img.to(device)
    labels = label.to(device)
    output = net(images)
    _,idx = torch.max(output.data,1)
    total += labels.size(0)
    correct += (idx == labels).sum()
    
print(f'accuracy:{100.*correct/total}')

Files already downloaded and verified
Files already downloaded and verified
epoch:1
| Step=0，current loss = 2.460303544998169
| Step=1，current loss = 1.6955255270004272
| Step=2，current loss = 1.6867762804031372
| Step=3，current loss = 1.5862367153167725
| Step=4，current loss = 1.4904260635375977
| Step=5，current loss = 1.2605403661727905
| Step=6，current loss = 1.08897066116333
| Step=7，current loss = 1.5863913297653198
| Step=8，current loss = 1.1882991790771484
| Step=9，current loss = 1.0314640998840332
| Step=10，current loss = 1.1954221725463867
| Step=11，current loss = 1.1715610027313232
| Step=12，current loss = 1.1835170984268188
| Step=13，current loss = 0.9911937713623047
| Step=14，current loss = 1.012398362159729
| Step=15，current loss = 1.2501946687698364
epoch:2
| Step=0，current loss = 0.8314551115036011
| Step=1，current loss = 0.6235232353210449
| Step=2，current loss = 0.9312174320220947
| Step=3，current loss = 0.9073468446731567
| Step=4，current loss = 1.0458409786224365
| S