## 移除最后conv最后一层网络

    用resnet18提取特征, 使用两个個fc进行分类


In [1]:
import sys
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mping
import torch
import torch.nn as nn
import torchvision.models as models
import torch.utils.data as data
from torchvision import transforms

from data_loader import *
from model import *

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
class EncoderResnet18Cut(nn.Module):
    """remove the last conv network"""
    def __init__(self):
        super(EncoderResnet18Cut, self).__init__()
        resnet = models.resnet18(pretrained=True)
        for param in resnet.parameters():
            param.requires_grad_(False)
        
        modules = list(resnet.children())[:-3]
        self.resnet = nn.Sequential(*modules)
        self.pool = nn.AvgPool2d(14)
        
    def forward(self, images):
        features = self.resnet(images)
        features = self.pool(features)
        features = features.view(features.size(0), -1)
        return features

In [3]:
class MultiFCClassify(nn.Module):
    def __init__(self, in_features, class_size):
        super(MultiFCClassify, self).__init__()
        self.bn = nn.BatchNorm1d(in_features)
        self.drop = nn.Dropout(0.3)
        self.fc2 = nn.Linear(in_features, class_size)
    
    def forward(self, features):
#         y = self.fc1(features)
        y = self.bn(features)
        y = self.drop(features)
        y = self.fc2(y)
        return y

In [4]:
def valid_class_acc(classify_model, valid_data_loader):
    classify_model = classify_model.eval()
    
    indices = valid_data_loader.dataset.get_train_indices()
    new_sampler = data.sampler.SubsetRandomSampler(indices=indices)
    valid_data_loader.batch_sampler.sampler = new_sampler
    
    embeds, targets = next(iter(valid_data_loader))
    
    embeds = embeds.squeeze(1)
    targets = targets.type(torch.LongTensor).to(device)
        
    outputs = classify_model(embeds)
    
    predict_result = outputs.argmax(1)
    size = len(predict_result)
    accuracy = torch.sum(predict_result == targets).item() / size * 100
    
    return accuracy

### set high parameters

In [5]:
batch_size = 256
num_epochs = 100
extract_size = 256
class_size = 61


# 图片格式转化
transform_train = transforms.Compose([
    transforms.Resize([224,224]),
#     transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
transform_vaild = transforms.Compose([
    transforms.Resize([224,224]),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

### load encoded datas

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# exract the images to embedding tensor
# remove the last conv layer
encoder = EncoderResnet18Cut()
encoder = encoder.to(device)

train_fold = './bottle_neck/resnet18_train_remove_last_conv'
data_loader = get_encoder_loader_fold(transform_train, encoder, device, train_fold, load=False, mode='train', batch_size=batch_size)


valid_flod = './bottle_neck/resnet18_valid_remove_last_conv'
valid_data_loader = get_encoder_loader_fold(transform_train, encoder, device, valid_flod, load=False, mode='valid', batch_size=batch_size)

### init model

使用两层网络,结果过拟合,训练准确率很高,验证结果比较差,使用再试

In [7]:
# set the total number of training steps per epoch
total_step = int(len(data_loader.dataset)/batch_size)

classify_model = MultiFCClassify(extract_size, class_size)
classify_model = classify_model.to(device)

criterion = nn.CrossEntropyLoss().cuda() if torch.cuda.is_available() else nn.CrossEntropyLoss()

# with RMSprop to slow the desent gradient progress
optimizer = torch.optim.Adam(classify_model.parameters(), lr=0.001)

### Load the best trained model,yet!

In [8]:
classify_model.load_state_dict(torch.load('./models/class_single_resnet18rm1layer_last.pkl'))

### time to train model

In [57]:
optimizer = torch.optim.SGD(classify_model.parameters(), lr=0.0001)

In [9]:
best_acc = 0

for epoch in range(1, num_epochs+1):
    start = time.time()
    classify_model = classify_model.train()
    for i_step in range(1, total_step+1):
        
        # Ramdomly get samples
        indices = data_loader.dataset.get_train_indices()
        new_sampler = data.sampler.SubsetRandomSampler(indices=indices)
        data_loader.batch_sampler.sampler = new_sampler
        
        embeds, targets = next(iter(data_loader))
        
        embeds = embeds.squeeze(1)
        targets = targets.type(torch.LongTensor).to(device)
        
        classify_model.zero_grad()
        
        outputs = classify_model(embeds)
        
        loss = criterion(outputs, targets.view(-1))
        
        loss.backward()
        
        optimizer.step()
        
        if i_step%20 == 0:
            # calculate the status
            predict_result = outputs.argmax(1)
            accuracy = torch.sum(predict_result == targets).item() / batch_size * 100
            best_acc = accuracy if accuracy > best_acc else best_acc
            
            stats = 'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Accuracy: %.2f%%, Best_acc: %.2f%%\
            ' % (epoch, num_epochs, i_step, total_step, loss.item(), accuracy, best_acc)
            print('\r' + stats, end='')
            sys.stdout.flush()
    valid_acc = valid_class_acc(classify_model, valid_data_loader)
    print('\n Epoch {}, spent time:{:.2f}s, valid: {:.2f}%'.format(epoch, time.time()-start, valid_acc))       
    if epoch%10 == 0:
        torch.save(classify_model.state_dict(), os.path.join('./models', 'class_single_resnet18rm1layer_%d.pkl' % epoch))
torch.save(classify_model.state_dict(), os.path.join('./models', 'class_single_resnet18rm1layer_last.pkl'))

Epoch [1/100], Step [120/127], Loss: 3.7266, Accuracy: 8.20%, Best_acc: 8.20%            
 Epoch 1, spent time:10.55s, valid: 7.81%
Epoch [2/100], Step [120/127], Loss: 3.8126, Accuracy: 6.64%, Best_acc: 9.38%            
 Epoch 2, spent time:10.51s, valid: 8.98%
Epoch [3/100], Step [120/127], Loss: 3.7799, Accuracy: 6.64%, Best_acc: 9.38%            
 Epoch 3, spent time:10.45s, valid: 4.69%
Epoch [4/100], Step [120/127], Loss: 3.7600, Accuracy: 10.55%, Best_acc: 10.55%            
 Epoch 4, spent time:10.59s, valid: 8.59%
Epoch [5/100], Step [120/127], Loss: 3.7637, Accuracy: 4.69%, Best_acc: 11.33%             
 Epoch 5, spent time:10.67s, valid: 6.64%
Epoch [6/100], Step [120/127], Loss: 3.8311, Accuracy: 5.86%, Best_acc: 11.33%            
 Epoch 6, spent time:10.51s, valid: 5.47%
Epoch [7/100], Step [120/127], Loss: 3.7612, Accuracy: 7.42%, Best_acc: 11.33%            
 Epoch 7, spent time:10.51s, valid: 10.94%
Epoch [8/100], Step [120/127], Loss: 3.7010, Accuracy: 13.28%, Best_a

Epoch [61/100], Step [120/127], Loss: 3.6068, Accuracy: 9.38%, Best_acc: 16.80%             
 Epoch 61, spent time:10.49s, valid: 14.45%
Epoch [62/100], Step [120/127], Loss: 3.5722, Accuracy: 9.77%, Best_acc: 16.80%             
 Epoch 62, spent time:10.59s, valid: 15.23%
Epoch [63/100], Step [120/127], Loss: 3.6412, Accuracy: 10.55%, Best_acc: 16.80%            
 Epoch 63, spent time:10.54s, valid: 13.28%
Epoch [64/100], Step [120/127], Loss: 3.5602, Accuracy: 14.84%, Best_acc: 16.80%            
 Epoch 64, spent time:10.54s, valid: 15.23%
Epoch [65/100], Step [120/127], Loss: 3.5618, Accuracy: 10.16%, Best_acc: 16.80%            
 Epoch 65, spent time:10.52s, valid: 15.62%
Epoch [66/100], Step [120/127], Loss: 3.5189, Accuracy: 10.55%, Best_acc: 16.80%            
 Epoch 66, spent time:10.74s, valid: 16.02%
Epoch [67/100], Step [120/127], Loss: 3.6140, Accuracy: 8.59%, Best_acc: 16.80%             
 Epoch 67, spent time:10.61s, valid: 12.89%
Epoch [68/100], Step [120/127], Loss: 3.4

KeyboardInterrupt: 

### How good is the model?

In [24]:
valid_flod = './bottle_neck/resnet18_valid_remove_last_conv'
valid_data_loader = get_encoder_loader_fold(transform_train, encoder, device, valid_flod, load=False, mode='valid', batch_size=batch_size)

In [53]:
classify_model.load_state_dict(torch.load('./models/class_single_resnet18rm1layer_last.pkl'))

In [62]:
classify_model = classify_model.eval()
predict = []
total = len(valid_data_loader.dataset)
for idx in range(total):
    embed, target = valid_data_loader.dataset[idx]
    p = classify_model(embed).argmax().item()
    predict.append(p == target)
    
accuracy = sum(predict)/len(predict)
print('The final accuracy is %.2f%%.' % (accuracy*100))

The final accuracy is 69.43%.


## Why? Why the random validation accuracy is good, but not the whole dataset?

## With 100 epoch the result is the best,yet.

In [23]:
!mv ./models/class_multify_rm1layer_100.pkl ./models/class_multify_rm1layer_good.pkl