# 内容一：在MNIST数据集上构建网络进行分类

## 1. 实验前导

In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import torch.utils.data as tud
import numpy as np

## 2. 准备数据

### 学会使用Dataloader来加载数据
Dataloader能够帮我们打乱数据集，拿到batch数据 \
为了使用Dataloader，需要定义以下三个function
- \__init__: 模型初始化
- \__len__: 返回整个数据集有多少item
- \__getitem__: 根据给定的index返回一个item

调用Dataloader之前还要先定义dataset

In [2]:
# Pytorch帮助我们预先加载了一些常用的数据集
# 如果使用这些数据集，会相对容易的进行数据加载
# 例如：常用的Mnist数据集
mnist_train_data = datasets.MNIST("./data",train=True,download=True,
                                 transform = transforms.Compose([
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=(0.13066062,),std=(0.30810776,))
                                 ]))
batch_size = 64
train_dataloader = tud.DataLoader(mnist_train_data,batch_size = batch_size,shuffle=True) # 将dataset转换为iterator
mnist_test_data = datasets.MNIST("./data",train=False,download=True,
                                 transform = transforms.Compose([
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=(0.13066062,),std=(0.30810776,))
                                 ]))
test_dataloader = tud.DataLoader(mnist_test_data,batch_size = batch_size)

## 3. 配置网络

### (1) 定义网络
- 继承 nn.Module
- 初始化函数
- forward 函数
- 其余可以根据模型需要定义相关的函数

In [3]:
# 定义一个简单的基于ConvNet的简单神经网络
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__() # the input is 1*28*28
        self.conv1 = nn.Conv2d(1,20,5,1) # (28-5)/1+1=24, 20*24*24
        self.conv2 = nn.Conv2d(20,50,5,1) # 12-5+1=8
        self.fc1 = nn.Linear(4*4*50,500)
        self.fc2 = nn.Linear(500,10)
    def forward(self,x):
        x = F.relu(self.conv1(x)) # 20 * 24 * 24
        x = F.max_pool2d(x,2,2) # 20 * 12 * 12
        x = F.relu(self.conv2(x)) # 50 * 8 * 8
        x = F.max_pool2d(x,2,2) # 50 * 4 * 4
        x = x.view(-1,4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x #F.log_softmax(x,dim=1)
model = Net()

### (2) 定义损失函数

In [4]:
loss_fn = nn.CrossEntropyLoss(reduction='mean')

### (3) 定义优化算法

In [5]:
lr = 0.01
momentum = 0.5
optimizer = optim.SGD(model.parameters(),lr=lr,momentum=momentum)

## 4. 训练网络

- 模型一般需要训练若干个epoch
- 每个epoch我们把所有数据分成若干个batch
- 把每个batch的输入和输出都包装成cuda Tensor
- forward pass
- 计算loss
- 清空gradient
- backward pass
- 更新模型参数
- 每隔一定的iteration输出loss，以及在验证集上做模型的评估

In [6]:
def train(model,train_dataloader,loss_fn,optimizer,epoch):
    model.train()
    for idx, (data, label) in enumerate(train_dataloader):
        output = model(data)
        loss = loss_fn(output,label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if idx % 100 == 0:
            print("Train Epoch: {}, iteration: {}, loss: {}".format(
                epoch,idx,loss.item()))  
    return model

## 5. 模型评估

In [7]:
def test(model,test_dataloader,loss_fn):
    model.eval()
    total_loss = 0.
    correct = 0.
    with torch.no_grad():
        for idx, (data,target) in enumerate(test_dataloader):
            output = model(data) # batch_size * 10        
            loss = loss_fn(output,target)*output.size(0)
            pred = output.argmax(dim=1)
            total_loss += loss
            correct += pred.eq(target).sum()
    total_loss /= len(test_dataloader.dataset)
    acc = 100.*correct/len(test_dataloader.dataset)
    print("Test Loss:{}, Accuracy:{}".format(total_loss,acc))
    return acc

In [8]:
num_epochs = 2
for epoch in range(num_epochs):
    model = train(model,train_dataloader,loss_fn,optimizer,epoch)
    test(model,test_dataloader,loss_fn)

Train Epoch: 0, iteration: 0, loss: 2.3065099716186523
Train Epoch: 0, iteration: 100, loss: 0.5287149548530579
Train Epoch: 0, iteration: 200, loss: 0.39324307441711426
Train Epoch: 0, iteration: 300, loss: 0.23228183388710022
Train Epoch: 0, iteration: 400, loss: 0.2594849169254303
Train Epoch: 0, iteration: 500, loss: 0.20849654078483582
Train Epoch: 0, iteration: 600, loss: 0.276593416929245
Train Epoch: 0, iteration: 700, loss: 0.2310458868741989
Train Epoch: 0, iteration: 800, loss: 0.08905921131372452
Train Epoch: 0, iteration: 900, loss: 0.07979127019643784
Test Loss:0.09404375404119492, Accuracy:97
Train Epoch: 1, iteration: 0, loss: 0.05104178935289383
Train Epoch: 1, iteration: 100, loss: 0.09109032899141312
Train Epoch: 1, iteration: 200, loss: 0.06026725843548775
Train Epoch: 1, iteration: 300, loss: 0.10993199050426483
Train Epoch: 1, iteration: 400, loss: 0.17266367375850677
Train Epoch: 1, iteration: 500, loss: 0.04914695397019386
Train Epoch: 1, iteration: 600, loss: 0

## 6. 模型存储

In [9]:
#torch.save(model.state_dict(),"mnist_cnn.pth")
# num_epochs = 2
best_valid_acc = 0.
for epoch in range(num_epochs):
    train(model,train_dataloader,loss_fn,optimizer,epoch)
    acc = test(model,test_dataloader,loss_fn)
    if acc > best_valid_acc:
        best_valid_acc = acc
        torch.save(model.state_dict(),"best_mnist_cnn.pth")

Train Epoch: 0, iteration: 0, loss: 0.11176422983407974
Train Epoch: 0, iteration: 100, loss: 0.14738669991493225
Train Epoch: 0, iteration: 200, loss: 0.059008121490478516
Train Epoch: 0, iteration: 300, loss: 0.04163815453648567
Train Epoch: 0, iteration: 400, loss: 0.07686883956193924
Train Epoch: 0, iteration: 500, loss: 0.07639259845018387
Train Epoch: 0, iteration: 600, loss: 0.10555222630500793
Train Epoch: 0, iteration: 700, loss: 0.04965192824602127
Train Epoch: 0, iteration: 800, loss: 0.03391193971037865
Train Epoch: 0, iteration: 900, loss: 0.03760581091046333
Test Loss:0.045484866946935654, Accuracy:98
Train Epoch: 1, iteration: 0, loss: 0.06344600766897202
Train Epoch: 1, iteration: 100, loss: 0.011947263032197952
Train Epoch: 1, iteration: 200, loss: 0.1278928965330124
Train Epoch: 1, iteration: 300, loss: 0.018403593450784683
Train Epoch: 1, iteration: 400, loss: 0.03823499754071236
Train Epoch: 1, iteration: 500, loss: 0.07683011144399643
Train Epoch: 1, iteration: 600

### Load模型

In [10]:
test_model = Net()
test_model.load_state_dict(torch.load("mnist_cnn.pth"))
test(model,test_dataloader,loss_fn)

Test Loss:0.050678204745054245, Accuracy:98


tensor(98)

### For FashionMNIST

In [11]:
batch_size = 32
train_dataloader = tud.DataLoader(
    datasets.FashionMNIST("./fashion_mnist_data",train=True,download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize(mean=(0.2860402,),std=(0.3530239,))
                   ])),
    batch_size=batch_size,
    shuffle=True) # 将dataset转换为iterator
test_dataloader = tud.DataLoader(
    datasets.FashionMNIST("./fashion_mnist_data",train=False,download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize(mean=(0.2860402,),std=(0.3530239,))
                   ])),
    batch_size=batch_size) # 将dataset转换为iterator
lr = 0.01
momentum = 0.5
model = Net()
optimizer = optim.SGD(model.parameters(),lr=lr,momentum=momentum)
num_epochs = 2

for epoch in range(num_epochs):
    train(model,train_dataloader,loss_fn,optimizer,epoch)
    test(model,test_dataloader,loss_fn)
    
torch.save(model.state_dict(),"fashion_mnist_cnn.pth")

Train Epoch: 0, iteration: 0, loss: 2.3010900020599365
Train Epoch: 0, iteration: 100, loss: 0.7379857897758484
Train Epoch: 0, iteration: 200, loss: 0.9390102028846741
Train Epoch: 0, iteration: 300, loss: 0.5211475491523743
Train Epoch: 0, iteration: 400, loss: 0.6993981599807739
Train Epoch: 0, iteration: 500, loss: 0.8875253200531006
Train Epoch: 0, iteration: 600, loss: 0.47595497965812683
Train Epoch: 0, iteration: 700, loss: 0.5478909015655518
Train Epoch: 0, iteration: 800, loss: 0.6199755668640137
Train Epoch: 0, iteration: 900, loss: 0.7485199570655823
Train Epoch: 0, iteration: 1000, loss: 0.3776535093784332
Train Epoch: 0, iteration: 1100, loss: 0.5473750829696655
Train Epoch: 0, iteration: 1200, loss: 0.6438551545143127
Train Epoch: 0, iteration: 1300, loss: 0.3094743490219116
Train Epoch: 0, iteration: 1400, loss: 0.46001294255256653
Train Epoch: 0, iteration: 1500, loss: 0.6064783930778503
Train Epoch: 0, iteration: 1600, loss: 0.7342036366462708
Train Epoch: 0, iteratio

# 内容二：CNN模型的迁移学习

- 很多时候当我们训练一个新的图像分类任务，我们不会完全从一个随机的模型开始训练，而是利用预训练的模型来加速训练的过程。我们经常使用在ImageNet上的预训练模型
- 有两种方法做迁移学习
    - finetuning：从一个预训练模型开始，改变一些模型的架构，然后继续训练整个模型的参数；
    - feature extraction：不改变预训练模型的参数，只更新我们改变过的部分模型参数。（当成特征提取器来使用）

## 1. 实验前导

In [12]:
import os
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import models, datasets, transforms
import torch.utils.data as tud
import numpy as np

## 2. 准备数据

数据：使用hymenoptera_data数据集 \
数据集包括两类图片，bees和ants。这些数据都被处理成了可以使用ImageFolder来读取的格式。我们只需要把data_dir设置成数据的根目录，然后把model_name设置成我们想要使用的预训练模型

In [13]:
# format
data_dir = "./data/hymenoptera_data"
model_name = "resnet18"
num_class = 2
#feature_extract = True
input_size = 224

读入数据: 把数据预处理成相应的格式

In [14]:
# progress
all_imgs = datasets.ImageFolder(os.path.join(data_dir,"train"),
                                transform=transforms.Compose([
                                    transforms.RandomResizedCrop(input_size),
                                    transforms.RandomHorizontalFlip(),
                                    transforms.ToTensor(),                                    
                                ]))
loader = tud.DataLoader(all_imgs,batch_size=batch_size,shuffle=True)

In [15]:
all_imgs[20][0]

tensor([[[0.5059, 0.5098, 0.5098,  ..., 0.7020, 0.6902, 0.6941],
         [0.5098, 0.5020, 0.4941,  ..., 0.6863, 0.6824, 0.6941],
         [0.5059, 0.4941, 0.4824,  ..., 0.6745, 0.6863, 0.6863],
         ...,
         [0.4902, 0.4706, 0.2941,  ..., 0.5725, 0.6824, 0.5647],
         [0.5098, 0.4588, 0.2588,  ..., 0.4471, 0.6118, 0.5686],
         [0.4784, 0.3843, 0.3294,  ..., 0.5804, 0.3725, 0.3725]],

        [[0.5176, 0.5176, 0.5059,  ..., 0.7098, 0.7059, 0.7059],
         [0.5098, 0.5098, 0.5098,  ..., 0.7020, 0.7020, 0.7059],
         [0.5059, 0.5020, 0.5059,  ..., 0.7020, 0.6980, 0.6941],
         ...,
         [0.5059, 0.4706, 0.3059,  ..., 0.5765, 0.6902, 0.5882],
         [0.5176, 0.4275, 0.2549,  ..., 0.4431, 0.6196, 0.5765],
         [0.4902, 0.3765, 0.3020,  ..., 0.5725, 0.3647, 0.3569]],

        [[0.5451, 0.5529, 0.5608,  ..., 0.7451, 0.7333, 0.7294],
         [0.5529, 0.5373, 0.5373,  ..., 0.7373, 0.7255, 0.7333],
         [0.5490, 0.5412, 0.5569,  ..., 0.7294, 0.7216, 0.

In [16]:
# format
batch_size = 32
train_imgs = datasets.ImageFolder(os.path.join(data_dir,"train"),
                                transform=transforms.Compose([
                                    transforms.RandomResizedCrop(input_size),
                                    transforms.RandomHorizontalFlip(),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406],[0.229,0.224,0.225])
                                ]))
train_dataloader = tud.DataLoader(train_imgs,batch_size=batch_size,shuffle=True)
test_imgs = datasets.ImageFolder(os.path.join(data_dir,"val"),
                                transform=transforms.Compose([
                                    transforms.Resize(input_size),  
                                    transforms.CenterCrop(input_size),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406],[0.229,0.224,0.225])
                                ]))
test_dataloader = tud.DataLoader(test_imgs,batch_size=batch_size)

## 3. 配置网络

### (1) 定义网络

In [17]:
# format
def initialize_model(model_name,num_class,use_pretrained=True,feature_extract=True):
    if model_name == "resnet18":
        model_ft = models.resnet18(pretrained=use_pretrained)
        if feature_extract: # do not update the parameters
            for param in model_ft.parameters():
                param.requires_grad = False
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_class)        
    else:
        print("model not implemented")
        return None
    return model_ft
model_ft = initialize_model("resnet18",2,use_pretrained=False,feature_extract=False)

In [18]:
print(model_ft.layer1[0].conv1.weight.requires_grad)
print(model_ft.fc.weight.requires_grad)

True
True


### (2) 定义损失函数

In [19]:
loss_fn = nn.CrossEntropyLoss()

### (3) 定义优化算法

In [20]:
lr = 0.01
momentum = 0.5
optimizer = optim.SGD(model_ft.parameters(),lr=lr,momentum=momentum)

## 4. 训练网络

In [31]:
def train_model(model,train_dataloader,loss_fn,optimizer,epoch):
    model.train()
    total_loss = 0.
    total_corrects = 0.
    for idx, (inputs, labels) in enumerate(train_dataloader):
        outputs = model(inputs)
        loss = loss_fn(outputs,labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        preds = outputs.argmax(dim=1)
        total_loss += loss.item() * inputs.size(0)
        total_corrects += torch.sum(preds.eq(labels))
    epoch_loss = total_loss / len(train_dataloader.dataset)
    epoch_accuracy = 100.*total_corrects / len(train_dataloader.dataset)
    print("Epoch:{}, Training Loss:{}, Traning Acc:{}".format(epoch,epoch_loss,epoch_accuracy))  
    #return model        

## 5. 模型评估

In [32]:
def test_model(model,test_dataloader,loss_fn):
    model.eval()
    total_loss = 0.
    total_corrects = 0.
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(test_dataloader):
            outputs = model(inputs)
            loss = loss_fn(outputs,labels)
            preds = outputs.argmax(dim=1)
            total_loss += loss.item() * inputs.size(0)
            total_corrects += torch.sum(preds.eq(labels))
    epoch_loss = total_loss / len(test_dataloader.dataset)
    epoch_accuracy = 100.*total_corrects / len(test_dataloader.dataset)
    print("acc type:", epoch_accuracy)
    print("Test Loss:{}, Test Acc:{}".format(epoch_loss,epoch_accuracy))  
    return epoch_accuracy 

In [33]:
num_epochs = 5
for epoch in range(num_epochs):
    train_model(model_ft,train_dataloader,loss_fn,optimizer,epoch)
    acc = test_model(model_ft,test_dataloader,loss_fn)

Epoch:0, Training Loss:0.5410346681954431, Traning Acc:72
acc type: tensor(71)
Test Loss:0.690188929925557, Test Acc:71
Epoch:1, Training Loss:0.5348944566288932, Traning Acc:75
acc type: tensor(69)
Test Loss:0.5898920726152806, Test Acc:69
Epoch:2, Training Loss:0.493601351487832, Traning Acc:72
acc type: tensor(75)
Test Loss:0.536457032549615, Test Acc:75
Epoch:3, Training Loss:0.5270603924501137, Traning Acc:71
acc type: tensor(49)
Test Loss:1.2412341872071908, Test Acc:49
Epoch:4, Training Loss:0.5180376953765994, Traning Acc:72
acc type: tensor(69)
Test Loss:0.6323698575979744, Test Acc:69


# 手动搭建resnet

In [38]:
# format
def initialize_model(model_name,num_class,use_pretrained=True,feature_extract=True):
    if model_name == "resnet50":
        model_ft = models.resnet50(pretrained=use_pretrained)
        if feature_extract: # do not update the parameters
            for param in model_ft.parameters():
                param.requires_grad = False
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_class)        
    else:
        print("model not implemented")
        return None
    return model_ft
model_ft = initialize_model("resnet50",2,use_pretrained=False,feature_extract=False)

In [39]:
# model_ft = model_ft.to('cuda')
model_ft.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [40]:
num_epochs = 5
for epoch in range(num_epochs):
    train_model(model_ft,train_dataloader,loss_fn,optimizer,epoch)
    acc = test_model(model_ft,test_dataloader,loss_fn)

Epoch:0, Training Loss:0.752627332679561, Traning Acc:50
acc type: tensor(45)
Test Loss:0.7095993141722835, Test Acc:45
Epoch:1, Training Loss:0.7561357343783144, Traning Acc:50
acc type: tensor(45)
Test Loss:0.748934227267122, Test Acc:45
Epoch:2, Training Loss:0.7443977398950545, Traning Acc:50
acc type: tensor(45)
Test Loss:0.7714804520014844, Test Acc:45
Epoch:3, Training Loss:0.7539092409806173, Traning Acc:50
acc type: tensor(45)
Test Loss:0.7764017223532683, Test Acc:45
Epoch:4, Training Loss:0.746807972915837, Traning Acc:50
acc type: tensor(45)
Test Loss:0.7702447557760999, Test Acc:45


## 冻结某些层参数不训练

In [43]:
model_ft = models.resnet50(pretrained=True)
ct = 0
for child in model_ft.children():
    ct += 1
    if ct < 7:
        for param in child.parameters():
            param.requires_grad = False

In [45]:
# 优化器中filter一下
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model_ft.parameters()), lr=1e-3)

In [46]:
model_ft.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [48]:
for param in model_ft.parameters():
    print(param.requires_grad)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


In [49]:
# 重新训练
num_epochs = 5
for epoch in range(num_epochs):
    train_model(model_ft,train_dataloader,loss_fn,optimizer,epoch)
    acc = test_model(model_ft,test_dataloader,loss_fn)

Epoch:0, Training Loss:8.656122301445633, Traning Acc:0
acc type: tensor(0)
Test Loss:8.939993063608805, Test Acc:0
Epoch:1, Training Loss:6.455522443427414, Traning Acc:2
acc type: tensor(7)
Test Loss:5.62908474915947, Test Acc:7
Epoch:2, Training Loss:4.495868518704274, Traning Acc:18
acc type: tensor(26)
Test Loss:3.646022039301255, Test Acc:26
Epoch:3, Training Loss:2.8100674543224398, Traning Acc:44
acc type: tensor(53)
Test Loss:2.27519834664912, Test Acc:53
Epoch:4, Training Loss:1.7467065834608235, Traning Acc:69
acc type: tensor(72)
Test Loss:1.5153454978481617, Test Acc:72


# 手动搭建resnet

In [91]:
import time
import torch
from torch import nn
import torch.utils.model_zoo as model_zoo
from torch.autograd import Variable
import os
from scipy import misc
import numpy as np
import scipy
import matplotlib.pyplot as plt
import torchvision

model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}


def conv3x3(in_planes, out_planes, stride=1):
    """ 3x3卷积（padding）
    :param in_planes:
    :param out_planes:
    :param stride:
    :return:
    """
    return nn.Conv2d(in_channels=in_planes, out_channels=out_planes, kernel_size=3, stride=stride, padding=1, bias=False)


class BasicBlock(nn.Module):
    """
    BasicBlock
    """
    expansion = 1  # 最后一层是前一层的expansion倍

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(in_planes=inplanes, out_planes=planes, stride=stride)
        self.bn1 = nn.BatchNorm2d(num_features=planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(in_planes=planes, out_planes=planes)
        self.bn2 = nn.BatchNorm2d(num_features=planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        """
        :param x:
        :return:
        """
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    """
    Bottleneck
    """
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)

        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)

        self.bn2 = nn.BatchNorm2d(planes)

        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)

        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        """
        :param x:
        :return:
        """
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):
    """ Constructs  a ResNet template
    """
    def __init__(self, block, layers, n_classes=1000):
        """
        :param block: BasicBlock or Bottleneck
        :param layers:
        :param num_classes:
        """
        super(ResNet, self).__init__()
        self.n_classes = n_classes
        self.inplanes = 64
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)  # padding=(kernel_size-1)/2 bias=False
        self.bn1 = nn.BatchNorm2d(num_features=64)
        self.relu = nn.ReLU(inplace=True)

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # padding=(kernel_size-1)/2
        self.layer1 = self._make_layer(block=block, planes=64, blocks=layers[0])
        self.layer2 = self._make_layer(block=block, planes=128, blocks=layers[1], stride=2)
        self.layer3 = self._make_layer(block=block, planes=256, blocks=layers[2], stride=2)
        self.layer4 = self._make_layer(block=block, planes=512, blocks=layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
        self.fc = nn.Linear(in_features=512*block.expansion, out_features=self.n_classes)


        # 初始化卷积层和BN层
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        # stride = 1表示第一层，不需要下采样（使用maxpool下采样了），stride = 2表示第二，三，四层，需要下采样
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels=self.inplanes, out_channels=planes * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(num_features=planes * block.expansion)
            )

        layers = []
        # blocks中的第一层决定是否有下采样，其中第一个block的第一层没有下采样，其他block的第一层有下采样
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion

        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)


    def forward(self, x):
        """
        :param x:
        """
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.maxpool(x)
        # print('x.size():{}'.format(x.size()))

        x = self.layer1(x)
        # print('x.size():{}'.format(x.size()))
        x = self.layer2(x)
        # print('x.size():{}'.format(x.size()))
        x = self.layer3(x)
        # print('x.size():{}'.format(x.size()))
        x = self.layer4(x)
        # print('x.size():{}'.format(x.size()))

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

    def load_weights(self, url):
        pretrained_dict = model_zoo.load_url(model_urls[url])
        model_dict = self.state_dict()
        # print('pretrained_dict.keys():', pretrained_dict.keys())
        # print('model_dict.keys():', model_dict.keys())
        if self.n_classes!=1000:
            new_dict = {k: v for k, v in pretrained_dict.items() if k not in {'fc.weight', 'fc.bias'}}
        else:
            new_dict = pretrained_dict
        model_dict.update(new_dict)
        self.load_state_dict(model_dict)


def resnet50(pretrained=False, **kwargs):
    """Constructs a ResNet-50 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
    if pretrained:
        pretrained_dict = model_zoo.load_url(model_urls['resnet50'])
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k not in {'fc.bias', 'fc.weight'}}
        pretrained_dict.update(model.state_dict())
        # print(pretrained_dict.keys())
        model.load_state_dict(pretrained_dict)
    return model

model = resnet50(pretrained=True)
model.eval()



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [100]:
import sys
import torch.nn as nn
from PIL import Image
import glob
import numpy as np
import cv2
import torchvision.transforms as transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model=torchvision.models.resnet18(pretrained=False)
# model.fc = nn.Linear(model.fc.in_features,4,bias=False)
model.to(device)
model.eval()

image ="./test.jpg"
transform = transforms.Compose([transforms.Resize((224,224))])
image=cv2.imread(image)
image=cv2.resize(image,(224,224))
image = Image.fromarray(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))
tensor=torch.from_numpy(np.asarray(image)).permute(2,0,1).float()/255.0
tensor=tensor.reshape((1,3,224,224))
tensor=tensor.to(device)
output=model(tensor)
# print(output)
_, pred = torch.max(output.data,1)
print(pred.item())




891


In [101]:
with open('./imagenet.txt') as f:
  classes = [line.strip() for line in f.readlines()]

In [102]:
print(classes[pred[0]])

waffle iron
