In [2]:
# 导入所需要的包，请保证torchvision已经在你的环境中安装好
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F

import torchvision.datasets as dsets
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
import numpy as np
import copy

%matplotlib inline

# 设置图像读取器的超参数
image_size = 28  #图像的总尺寸28*28
num_classes = 10  #标签的种类数
num_epochs = 20  #训练的总循环周期
batch_size = 64  #批处理的尺寸大小

# 如果系统中存在着GPU，我们将用GPU来完成张量的计算
use_cuda = torch.cuda.is_available() #定义一个布尔型变量，标志当前的GPU是否可用

# 如果当前GPU可用，则将优先在GPU上进行张量计算
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
itype = torch.cuda.LongTensor if use_cuda else torch.LongTensor

# 加载MINIST数据，如果没有下载过，就会在当前路径下新建/data子目录，并把文件存放其中
# MNIST数据是属于torchvision包自带的数据，所以可以直接调用。
# 在调用自己的数据的时候，我们可以用torchvision.datasets.ImageFolder或者torch.utils.data.TensorDataset来加载
train_dataset = dsets.MNIST(root='./data',  #文件存放路径
                            train=True,   #提取训练集
                            transform=transforms.ToTensor(),  #将图像转化为Tensor
                            download=True)

# 加载测试数据集
test_dataset = dsets.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())


# 定义两个采样器，每一个采样器都随机地从原始的数据集中抽样数据。抽样数据采用permutation
# 生成任意一个下标重排，从而利用下标来提取dataset中的数据
sample_size = len(train_dataset)
sampler1 = torch.utils.data.sampler.SubsetRandomSampler(
    np.random.choice(range(len(train_dataset)), sample_size))
sampler2 = torch.utils.data.sampler.SubsetRandomSampler(
    np.random.choice(range(len(train_dataset)), sample_size))

# 定义两个加载器，分别封装了前两个采样器，实现采样。
train_loader1 = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           sampler = sampler1
                                           )
train_loader2 = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           sampler = sampler2
                                           )

# 对于校验数据和测试数据，我们进行类似的处理。
val_size = 5000
val_indices1 = range(val_size)
val_indices2 = np.random.permutation(range(val_size))
test_indices1 = range(val_size, len(test_dataset))
test_indices2 = np.random.permutation(test_indices1)
val_sampler1 = torch.utils.data.sampler.SubsetRandomSampler(val_indices1)
val_sampler2 = torch.utils.data.sampler.SubsetRandomSampler(val_indices2)

test_sampler1 = torch.utils.data.sampler.SubsetRandomSampler(test_indices1)
test_sampler2 = torch.utils.data.sampler.SubsetRandomSampler(test_indices2)

val_loader1 = torch.utils.data.DataLoader(dataset = test_dataset,
                                        batch_size = batch_size,
                                        shuffle = False,
                                        sampler = val_sampler1
                                        )
val_loader2 = torch.utils.data.DataLoader(dataset = test_dataset,
                                        batch_size = batch_size,
                                        shuffle = False,
                                        sampler = val_sampler2
                                        )
test_loader1 = torch.utils.data.DataLoader(dataset = test_dataset,
                                         batch_size = batch_size,
                                         shuffle = False,
                                         sampler = test_sampler1
                                         )
test_loader2 = torch.utils.data.DataLoader(dataset = test_dataset,
                                         batch_size = batch_size,
                                         shuffle = False,
                                         sampler = test_sampler2
                                         )

# 为了比较不同数据量对迁移学习的影响，我们设定了一个加载数据的比例fraction
# 即我们只加载原训练数据集的1/fraction来训练网络
fraction = 1

In [5]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        output = self.out(x)
        output = F.log_softmax(output, dim = 1)
        return output
    
CNN_net = torch.load('minst_conv_checkpoint')

In [6]:
CNN_net

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1568, out_features=10, bias=True)
)

In [7]:
def rightness(predictions, labels):
    """计算预测错误率的函数，其中predictions是模型给出的一组预测结果，batch_size行10列的矩阵，labels是数据之中的正确答案"""
    pred = torch.max(predictions.data, 1)[1] # 对于任意一行（一个样本）的输出值的第1个维度，求最大，得到每一行的最大元素的下标
    rights = pred.eq(labels.data.view_as(pred)).sum() #将下标与labels中包含的类别进行比较，并累计得到比较正确的数量
    return rights, len(labels) #返回正确的数量和这一次一共比较了多少元素

#在测试集上分批运行，并计算总的正确率
CNN_net.eval() #标志模型当前为运行阶段
test_loss = 0
correct = 0
vals = []

#对测试数据集进行循环
for data, target in test_loader1:
#     data, target = data.clone().detach().requires_grad_(False), target.clone().detach()
    with torch.no_grad():
        data = data.clone().detach()
    target = target.clone().detach()
    
    output = CNN_net(data) #将特征数据喂入网络，得到分类的输出
    val = rightness(output, target) #获得正确样本数以及总样本数
    vals.append(val) #记录结果

#计算准确率
rights = (sum([tup[0] for tup in vals]), sum([tup[1] for tup in vals]))
right_rate = 1.0 * rights[0].numpy() / rights[1]
right_rate

0.9966

In [30]:
class Transfer(nn.Module):
    def __init__(self):
        super(Transfer, self).__init__()
        self.net1_conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels = 1,
                out_channels = 16, 
                kernel_size = 5,
                stride = 1,
                padding = 2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2),
        )
        self.net1_conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels = 16,
                out_channels = 32,
                kernel_size = 5,
                stride = 1,
                padding = 2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2)
        )
        
        self.net2_conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels = 1,
                out_channels = 16, 
                kernel_size = 5,
                stride = 1,
                padding = 2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2),
        )
        self.net2_conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels = 16,
                out_channels = 32,
                kernel_size = 5,
                stride = 1,
                padding = 2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2)
        )
        
        self.fc1 = nn.Linear(2 * 32 * 7 * 7, 1024)
        self.fc2 = nn.Linear(1024, 2 * num_classes)
        self.fc3 = nn.Linear(2 * num_classes, num_classes)
        self.fc4 = nn.Linear(num_classes, 1)
    
    def forward(self, x, y, training = True):
        x = self.net1_conv1(x)
        x = self.net1_conv2(x)
        x = x.view(x.size(0), -1)
        
        y = self.net2_conv1(y)
        y = self.net2_conv2(y)
        y = y.view(y.size(0), -1)
        
        z = torch.cat((x, y), 1)
        z = self.fc1(z)
        z = F.relu(z)
        z = F.dropout(z, training = self.training)
        z = self.fc2(z)
        z = F.relu(z)
        z = self.fc3(z)
        z = F.relu(z)
        z = self.fc4(z)
        return z
    
    def set_filter_values(self, net):
        self.net1_conv1[0].weight.data = copy.deepcopy(net.conv1[0].weight.data)
        self.net1_conv1[0].bias.data = copy.deepcopy(net.conv1[0].bias.data)
        self.net1_conv2[0].weight.data = copy.deepcopy(net.conv2[0].weight.data)
        self.net1_conv2[0].bias.data = copy.deepcopy(net.conv2[0].bias.data)
        
        self.net2_conv1[0].weight.data = copy.deepcopy(net.conv1[0].weight.data)
        self.net2_conv1[0].bias.data = copy.deepcopy(net.conv1[0].bias.data)
        self.net2_conv2[0].weight.data = copy.deepcopy(net.conv2[0].weight.data)
        self.net2_conv2[0].bias.data = copy.deepcopy(net.conv2[0].bias.data)
        
        self.net1_conv1 = self.net1_conv1.cuda() if use_cuda else self.net1_conv1
        self.net1_conv2 = self.net1_conv2.cuda() if use_cuda else self.net1_conv2
        
        self.net2_conv1 = self.net2_conv1.cuda() if use_cuda else self.net2_conv1
        self.net2_conv2 = self.net2_conv2.cuda() if use_cuda else self.net2_conv2
    
    def set_filter_values_notgrad(self, net):
        set_filter_values()
        
        self.net1_conv1.weight.requires_grad = False
        self.net1_conv1.bais.requires_grad = False
        self.net1_conv2.weight.requires_grad = False
        self.net1_conv2.bias.requires_grad = False
        
        self.net2_conv1.weight.requires_grad = False
        self.net2_conv1.bais.requires_grad = False
        self.net2_conv2.weight.requires_grad = False
        self.net2_conv2.bias.requires_grad = False
        

def rightness(y, target):
    out = torch.round(y.squeeze()).type(itype)
    out = out.eq(target).sum()
    out1 = y.size()[0]
    return(out, out1)

In [33]:
# start transfer -> pretrained transfer
net = Transfer()
net.set_filter_values(CNN_net)

if use_cuda:
    net = net.cuda()

criterion = nn.MSELoss()
new_parameters = []
for para in net.parameters():
    if para.requires_grad:
        new_parameters.append(para)

optimizer = optim.Adam(new_parameters, lr = 0.001)


In [36]:
num_epochs = 10
records = []
for epoch in range(num_epochs):
    losses = []
    for step, data in enumerate(zip(train_loader1, train_loader2)):
        if step >= (len(train_loader1) // fraction):
            break
        
        ((x1, y1), (x2, y2)) = data
        if use_cuda:
            x1, y1, x2, y2 = x1.cuda(), y1.cuda(), x2.cuda(), y2.cuda()
        
        net.train()
        outputs = net(Variable(x1), Variable(x2))
        labels = y1 + y2
        loss = criterion(outputs, labels.type(torch.float))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss = loss.cpu() if use_cuda else loss
        losses.append(loss.data.numpy())
        
        if step % 100 == 0:
            val_losses = []
            rights = []
            net.eval()
            for val_data in zip(val_loader1, val_loader2):
                ((x1, y1), (x2, y2)) = val_data
                if use_cuda:
                    x1, y1, x2, y2 = x1.cuda(), y1.cuda(), x2.cuda(), y2.cuda()
                outputs = net(Variable(x1), Variable(x2))
                labels = y1 + y2
                loss = criterion(outputs, labels.type(torch.float))
                loss = loss.cpu() if use_cuda else loss
                val_losses.append(loss.data.numpy())
                
                right = rightness(outputs.data, labels)
                rights.append(right)
            right_ratio = 1.0 * np.sum([i[0] for i in rights]) / np.sum([i[1] for i in rights])
            print('epoch:{}, train_loss：{:.2f}, validation：{:.2f}, accuracy：{:.2f}'.format(
                epoch, np.mean(losses), np.mean(val_losses), right_ratio))
            records.append([np.mean(losses), np.mean(val_losses), right_ratio])


epoch:0, train_loss：20.24, validation：16.74, accuracy：0.10
epoch:0, train_loss：16.92, validation：16.88, accuracy：0.09
epoch:0, train_loss：17.09, validation：16.63, accuracy：0.10
epoch:0, train_loss：16.96, validation：16.73, accuracy：0.10
epoch:0, train_loss：16.84, validation：17.09, accuracy：0.10
epoch:0, train_loss：16.76, validation：17.31, accuracy：0.10
epoch:0, train_loss：16.78, validation：16.78, accuracy：0.09
epoch:0, train_loss：16.77, validation：16.95, accuracy：0.10
epoch:0, train_loss：16.75, validation：16.85, accuracy：0.10
epoch:0, train_loss：16.80, validation：16.97, accuracy：0.10
epoch:1, train_loss：18.13, validation：16.27, accuracy：0.09
epoch:1, train_loss：16.79, validation：16.88, accuracy：0.10
epoch:1, train_loss：16.65, validation：16.58, accuracy：0.10
epoch:1, train_loss：16.59, validation：16.83, accuracy：0.10
epoch:1, train_loss：16.56, validation：16.96, accuracy：0.11
epoch:1, train_loss：16.58, validation：17.07, accuracy：0.10
epoch:1, train_loss：16.63, validation：16.76, accuracy：0.