# 内容一：在MNIST数据集上构建网络进行分类

## 1. 实验前导

In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import torch.utils.data as tud
import numpy as np

## 2. 准备数据

### 学会使用Dataloader来加载数据
Dataloader能够帮我们打乱数据集，拿到batch数据 \
为了使用Dataloader，需要定义以下三个function
- \__init__: 模型初始化
- \__len__: 返回整个数据集有多少item
- \__getitem__: 根据给定的index返回一个item

调用Dataloader之前还要先定义dataset

In [2]:
# Pytorch帮助我们预先加载了一些常用的数据集
# 如果使用这些数据集，会相对容易的进行数据加载
# 例如：常用的Mnist数据集
mnist_train_data = datasets.MNIST("./data",train=True,download=True,
                                 transform = transforms.Compose([
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=(0.13066062,),std=(0.30810776,))
                                 ]))
batch_size = 64
train_dataloader = tud.DataLoader(mnist_train_data,batch_size = batch_size,shuffle=True) # 将dataset转换为iterator
mnist_test_data = datasets.MNIST("./data",train=False,download=True,
                                 transform = transforms.Compose([
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=(0.13066062,),std=(0.30810776,))
                                 ]))
test_dataloader = tud.DataLoader(mnist_test_data,batch_size = batch_size)

## 3. 配置网络

### (1) 定义网络
- 继承 nn.Module
- 初始化函数
- forward 函数
- 其余可以根据模型需要定义相关的函数

In [3]:
# 定义一个简单的基于ConvNet的简单神经网络
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__() # the input is 1*28*28
        self.conv1 = nn.Conv2d(1,20,5,1) # (28-5)/1+1=24, 20*24*24
        self.conv2 = nn.Conv2d(20,50,5,1) # 12-5+1=8
        self.fc1 = nn.Linear(4*4*50,500)
        self.fc2 = nn.Linear(500,10)
    def forward(self,x):
        x = F.relu(self.conv1(x)) # 20 * 24 * 24
        x = F.max_pool2d(x,2,2) # 20 * 12 * 12
        x = F.relu(self.conv2(x)) # 50 * 8 * 8
        x = F.max_pool2d(x,2,2) # 50 * 4 * 4
        x = x.view(-1,4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x #F.log_softmax(x,dim=1)
model = Net()

### (2) 定义损失函数

In [4]:
loss_fn = nn.CrossEntropyLoss(reduction='mean')

### (3) 定义优化算法

In [5]:
lr = 0.01
momentum = 0.5
optimizer = optim.SGD(model.parameters(),lr=lr,momentum=momentum)

## 4. 训练网络

- 模型一般需要训练若干个epoch
- 每个epoch我们把所有数据分成若干个batch
- 把每个batch的输入和输出都包装成cuda Tensor
- forward pass
- 计算loss
- 清空gradient
- backward pass
- 更新模型参数
- 每隔一定的iteration输出loss，以及在验证集上做模型的评估

In [6]:
def train(model,train_dataloader,loss_fn,optimizer,epoch):
    model.train()
    for idx, (data, label) in enumerate(train_dataloader):
        output = model(data)
        loss = loss_fn(output,label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if idx % 100 == 0:
            print("Train Epoch: {}, iteration: {}, loss: {}".format(
                epoch,idx,loss.item()))  
    return model

## 5. 模型评估

In [7]:
def test(model,test_dataloader,loss_fn):
    model.eval()
    total_loss = 0.
    correct = 0.
    with torch.no_grad():
        for idx, (data,target) in enumerate(test_dataloader):
            output = model(data) # batch_size * 10        
            loss = loss_fn(output,target)*output.size(0)
            pred = output.argmax(dim=1)
            total_loss += loss
            correct += pred.eq(target).sum()
    total_loss /= len(test_dataloader.dataset)
    acc = 100.*correct/len(test_dataloader.dataset)
    print("Test Loss:{}, Accuracy:{}".format(total_loss,acc))
    return acc

In [None]:
num_epochs = 2
for epoch in range(num_epochs):
    model = train(model,train_dataloader,loss_fn,optimizer,epoch)
    test(model,test_dataloader,loss_fn)

Train Epoch: 0, iteration: 0, loss: 2.2965400218963623
Train Epoch: 0, iteration: 100, loss: 0.49735018610954285


## 6. 模型存储

In [None]:
#torch.save(model.state_dict(),"mnist_cnn.pth")
# num_epochs = 2
best_valid_acc = 0.
for epoch in range(num_epochs):
    train(model,train_dataloader,loss_fn,optimizer,epoch)
    acc = test(model,test_dataloader,loss_fn)
    if acc > best_valid_acc:
        best_valid_acc = acc
        torch.save(model.state_dict(),"best_mnist_cnn.pth")

### Load模型

In [None]:
test_model = Net()
test_model.load_state_dict(torch.load("mnist_cnn.pth"))
test(model,test_dataloader,loss_fn)

### For FashionMNIST

In [None]:
batch_size = 32
train_dataloader = tud.DataLoader(
    datasets.FashionMNIST("./fashion_mnist_data",train=True,download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize(mean=(0.2860402,),std=(0.3530239,))
                   ])),
    batch_size=batch_size,
    shuffle=True) # 将dataset转换为iterator
test_dataloader = tud.DataLoader(
    datasets.FashionMNIST("./fashion_mnist_data",train=False,download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize(mean=(0.2860402,),std=(0.3530239,))
                   ])),
    batch_size=batch_size) # 将dataset转换为iterator
lr = 0.01
momentum = 0.5
model = Net()
optimizer = optim.SGD(model.parameters(),lr=lr,momentum=momentum)
num_epochs = 2

for epoch in range(num_epochs):
    train(model,train_dataloader,loss_fn,optimizer,epoch)
    test(model,test_dataloader,loss_fn)
    
torch.save(model.state_dict(),"fashion_mnist_cnn.pth")

# 内容二：CNN模型的迁移学习

- 很多时候当我们训练一个新的图像分类任务，我们不会完全从一个随机的模型开始训练，而是利用预训练的模型来加速训练的过程。我们经常使用在ImageNet上的预训练模型
- 有两种方法做迁移学习
    - finetuning：从一个预训练模型开始，改变一些模型的架构，然后继续训练整个模型的参数；
    - feature extraction：不改变预训练模型的参数，只更新我们改变过的部分模型参数。（当成特征提取器来使用）

## 1. 实验前导

In [None]:
import os
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import models, datasets, transforms
import torch.utils.data as tud
import numpy as np

## 2. 准备数据

数据：使用hymenoptera_data数据集 \
数据集包括两类图片，bees和ants。这些数据都被处理成了可以使用ImageFolder来读取的格式。我们只需要把data_dir设置成数据的根目录，然后把model_name设置成我们想要使用的预训练模型

In [None]:
# format
data_dir = "./data/hymenoptera_data"
model_name = "resnet18"
num_class = 2
#feature_extract = True
input_size = 224

读入数据: 把数据预处理成相应的格式

In [None]:
# progress
all_imgs = datasets.ImageFolder(os.path.join(data_dir,"train"),
                                transform=transforms.Compose([
                                    transforms.RandomResizedCrop(input_size),
                                    transforms.RandomHorizontalFlip(),
                                    transforms.ToTensor(),                                    
                                ]))
loader = tud.DataLoader(all_imgs,batch_size=batch_size,shuffle=True)

In [None]:
all_imgs[20][0]

In [None]:
# format
batch_size = 32
train_imgs = datasets.ImageFolder(os.path.join(data_dir,"train"),
                                transform=transforms.Compose([
                                    transforms.RandomResizedCrop(input_size),
                                    transforms.RandomHorizontalFlip(),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406],[0.229,0.224,0.225])
                                ]))
train_dataloader = tud.DataLoader(train_imgs,batch_size=batch_size,shuffle=True)
test_imgs = datasets.ImageFolder(os.path.join(data_dir,"val"),
                                transform=transforms.Compose([
                                    transforms.Resize(input_size),  
                                    transforms.CenterCrop(input_size),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406],[0.229,0.224,0.225])
                                ]))
test_dataloader = tud.DataLoader(test_imgs,batch_size=batch_size)

## 3. 配置网络

### (1) 定义网络

In [None]:
# format
def initialize_model(model_name,num_class,use_pretrained=True,feature_extract=True):
    if model_name == "resnet18":
        model_ft = models.resnet18(pretrained=use_pretrained)
        if feature_extract: # do not update the parameters
            for param in model_ft.parameters():
                param.requires_grad = False
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_class)        
    else:
        print("model not implemented")
        return None
    return model_ft
model_ft = initialize_model("resnet18",2,use_pretrained=False,feature_extract=False)

In [None]:
print(model_ft.layer1[0].conv1.weight.requires_grad)
print(model_ft.fc.weight.requires_grad)

### (2) 定义损失函数

In [None]:
loss_fn = nn.CrossEntropyLoss()

### (3) 定义优化算法

In [None]:
lr = 0.01
momentum = 0.5
optimizer = optim.SGD(model_ft.parameters(),lr=lr,momentum=momentum)

## 4. 训练网络

In [None]:
def train_model(model,train_dataloader,loss_fn,optimizer,epoch):
    model.train()
    total_loss = 0.
    total_corrects = 0.
    for idx, (inputs, labels) in enumerate(train_dataloader):
        outputs = model(inputs)
        loss = loss_fn(outputs,labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        preds = outputs.argmax(dim=1)
        total_loss += loss.item() * inputs.size(0)
        total_corrects += torch.sum(preds.eq(labels))
    epoch_loss = total_loss / len(train_dataloader.dataset)
    epoch_accuracy = 100.*total_corrects / len(train_dataloader.dataset)
    print("Epoch:{}, Training Loss:{}, Traning Acc:{}".format(epoch,epoch_loss,epoch_accuracy))  
    #return model        

## 5. 模型评估

In [None]:
def test_model(model,test_dataloader,loss_fn):
    model.eval()
    total_loss = 0.
    total_corrects = 0.
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(test_dataloader):
            outputs = model(inputs)
            loss = loss_fn(outputs,labels)
            preds = outputs.argmax(dim=1)
            total_loss += loss.item() * inputs.size(0)
            total_corrects += torch.sum(preds.eq(labels))
    epoch_loss = total_loss / len(test_dataloader.dataset)
    epoch_accuracy = 100.*total_corrects / len(test_dataloader.dataset)
    print("acc type:", epoch_accuracy)
    print("Test Loss:{}, Test Acc:{}".format(epoch_loss,epoch_accuracy))  
    return epoch_accuracy 

In [None]:
num_epochs = 5
for epoch in range(num_epochs):
    train_model(model_ft,train_dataloader,loss_fn,optimizer,epoch)
    acc = test_model(model_ft,test_dataloader,loss_fn)

# 手动搭建resnet

In [None]:
import time
import torch
from torch import nn
import torch.utils.model_zoo as model_zoo
from torch.autograd import Variable
import os
from scipy import misc
import numpy as np
import scipy
import matplotlib.pyplot as plt
import torchvision

model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}


def conv3x3(in_planes, out_planes, stride=1):
    """ 3x3卷积（padding）
    :param in_planes:
    :param out_planes:
    :param stride:
    :return:
    """
    return nn.Conv2d(in_channels=in_planes, out_channels=out_planes, kernel_size=3, stride=stride, padding=1, bias=False)


class BasicBlock(nn.Module):
    """
    BasicBlock
    """
    expansion = 1  # 最后一层是前一层的expansion倍

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(in_planes=inplanes, out_planes=planes, stride=stride)
        self.bn1 = nn.BatchNorm2d(num_features=planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(in_planes=planes, out_planes=planes)
        self.bn2 = nn.BatchNorm2d(num_features=planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        """
        :param x:
        :return:
        """
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    """
    Bottleneck
    """
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)

        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)

        self.bn2 = nn.BatchNorm2d(planes)

        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)

        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        """
        :param x:
        :return:
        """
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):
    """ Constructs  a ResNet template
    """
    def __init__(self, block, layers, n_classes=1000):
        """
        :param block: BasicBlock or Bottleneck
        :param layers:
        :param num_classes:
        """
        super(ResNet, self).__init__()
        self.n_classes = n_classes
        self.inplanes = 64
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)  # padding=(kernel_size-1)/2 bias=False
        self.bn1 = nn.BatchNorm2d(num_features=64)
        self.relu = nn.ReLU(inplace=True)

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # padding=(kernel_size-1)/2
        self.layer1 = self._make_layer(block=block, planes=64, blocks=layers[0])
        self.layer2 = self._make_layer(block=block, planes=128, blocks=layers[1], stride=2)
        self.layer3 = self._make_layer(block=block, planes=256, blocks=layers[2], stride=2)
        self.layer4 = self._make_layer(block=block, planes=512, blocks=layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
        self.fc = nn.Linear(in_features=512*block.expansion, out_features=self.n_classes)


        # 初始化卷积层和BN层
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        # stride = 1表示第一层，不需要下采样（使用maxpool下采样了），stride = 2表示第二，三，四层，需要下采样
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels=self.inplanes, out_channels=planes * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(num_features=planes * block.expansion)
            )

        layers = []
        # blocks中的第一层决定是否有下采样，其中第一个block的第一层没有下采样，其他block的第一层有下采样
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion

        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)


    def forward(self, x):
        """
        :param x:
        """
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.maxpool(x)
        # print('x.size():{}'.format(x.size()))

        x = self.layer1(x)
        # print('x.size():{}'.format(x.size()))
        x = self.layer2(x)
        # print('x.size():{}'.format(x.size()))
        x = self.layer3(x)
        # print('x.size():{}'.format(x.size()))
        x = self.layer4(x)
        # print('x.size():{}'.format(x.size()))

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

    def load_weights(self, url):
        pretrained_dict = model_zoo.load_url(model_urls[url])
        model_dict = self.state_dict()
        # print('pretrained_dict.keys():', pretrained_dict.keys())
        # print('model_dict.keys():', model_dict.keys())
        if self.n_classes!=1000:
            new_dict = {k: v for k, v in pretrained_dict.items() if k not in {'fc.weight', 'fc.bias'}}
        else:
            new_dict = pretrained_dict
        model_dict.update(new_dict)
        self.load_state_dict(model_dict)


def resnet50(pretrained=False, **kwargs):
    """Constructs a ResNet-50 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
    if pretrained:
        pretrained_dict = model_zoo.load_url(model_urls['resnet50'])
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k not in {'fc.bias', 'fc.weight'}}
        pretrained_dict.update(model.state_dict())
        # print(pretrained_dict.keys())
        model.load_state_dict(pretrained_dict)
    return model

model = resnet50(pretrained=True)
model.eval()



In [None]:
import sys
import torch.nn as nn
from PIL import Image
import glob
import numpy as np
import cv2
import torchvision.transforms as transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model=torchvision.models.resnet18(pretrained=False)
# model.fc = nn.Linear(model.fc.in_features,4,bias=False)
model.to(device)
model.eval()

image ="./test.jpg"
transform = transforms.Compose([transforms.Resize((224,224))])
image=cv2.imread(image)
image=cv2.resize(image,(224,224))
image = Image.fromarray(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))
tensor=torch.from_numpy(np.asarray(image)).permute(2,0,1).float()/255.0
tensor=tensor.reshape((1,3,224,224))
tensor=tensor.to(device)
output=model(tensor)
# print(output)
_, pred = torch.max(output.data,1)
print(pred.item())




In [None]:
with open('./imagenet.txt') as f:
  classes = [line.strip() for line in f.readlines()]

In [None]:
print(classes[pred[0]])