In [5]:
from __future__ import print_function, division
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from reData import *
from torchvision.datasets import ImageFolder
from torchvision import transforms
import torch.nn.functional as F
import torch.nn as nn
import time
from tqdm import tqdm
import torchvision.models as models

## 分割数据 / 加载数据迭代器 
***目录下 data/dog_breed/ 放kaggle上的数据***

In [7]:
#import data
SPLIT_FILES = False
BATCH_SIZE = 4
data_dir = 'data/dog_breed'        
label_file, train_dir, test_dir = 'labels.csv', 'train', 'test'
input_dir, batch_size, valid_ratio = 'train_valid_test', 128, 0.1

if SPLIT_FILES:
    reorg_dog_data(data_dir, label_file, train_dir, test_dir, input_dir,
                    valid_ratio)

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_ds = ImageFolder(os.path.join(data_dir, input_dir, 'train'),transform = transform)
valid_ds = ImageFolder(os.path.join(data_dir, input_dir, 'valid'),transform = transform)
train_valid_ds = ImageFolder(os.path.join(data_dir, input_dir, 'train_valid'),transform = transform)
test_ds = ImageFolder(os.path.join(data_dir, input_dir, 'test'),transform = transform)


train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
valid_dl = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
train_valid_dl = DataLoader(train_valid_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

## 提取各种深度网络的feature层

- 提取VGG19,ResNet50,ResNet152,Desnet162的features层的输出
- 通过average pooling来调整输出为(batchsize , channel, 1 , 1)
- 通过ConcatNet 来合并所有输出

*这里只选择了三种网络vgg19 / resnet152 / desnet162 进行合并*

In [None]:
class VGG19Features(nn.Module):
    def __init__(self, original_model):
        super(VGG19Features, self).__init__()
        self.features = nn.Sequential(
            *list(original_model.children())[:-1],
            nn.AvgPool2d(7)
        )
        
    def forward(self, x):
        x = self.features(x)
        return x

# testx = torch.randn((32,3,224,224))
vgg19_model = models.vgg19_bn(pretrained=True)
vgg19_features = VGG19Features(vgg19_model) # output [b,c,h = 1,w = 1]
vgg19_features = vgg19_features.eval()
# print(vgg19_features(testx).shape)

In [None]:
class ResNet50Features(nn.Module):
    def __init__(self, original_model):
        super(ResNet50Features, self).__init__()
        self.features = nn.Sequential(
            *list(original_model.children())[:-1],
        )
    def forward(self, x):
        x = self.features(x)
        return x

# res50_model = models.resnet50(pretrained=True)
# res50_features = ResNet50Features(res50_model) # output [b,c,h = 1,w = 1]
# res50_features = res50_features.eval()
# print(res50_features(testx).shape)

In [None]:
class ResNet152Features(nn.Module):
    def __init__(self, original_model):
        super(ResNet152Features, self).__init__()
        self.features = nn.Sequential(
            *list(original_model.children())[:-1],
        )
        
    def forward(self, x):
        x = self.features(x)
        return x

res152_model = models.resnet152(pretrained=True)
res152_features = ResNet152Features(res152_model) # output [b,c,h = 1,w = 1]
res152_features = res152_features.eval()
# print(res152_features)
# print(res152_features(testx).shape)

In [None]:
class densenet161Features(nn.Module):
    def __init__(self, original_model):
        super(densenet161Features, self).__init__()
        self.features = nn.Sequential(
            *list(original_model.children())[:-1],
            nn.AvgPool2d(7)
        )
    def forward(self, x):
        x = self.features(x)
        return x

densenet161_model = models.densenet161(pretrained=True)
densenet161_features = densenet161Features(densenet161_model) # output [b,c,h = 1,w = 1]
densenet161_features = densenet161_features.eval()
# print(densenet161_features(testx).shape)

In [None]:
class ConcatNet(nn.Module):
    def __init__(self,net1,net2,net3,**kwargs):
        super(ConcatNet,self).__init__(**kwargs)
        self.net1 = net1
        self.net2 = net2
        self.net3 = net3
    def forward(self, x):
        return torch.cat((self.net1(x),self.net2(x),self.net3(x)),dim=1)

In [None]:
#saving features from two models
concatNet = ConcatNet(densenet161_features,res152_features,vgg19_features).cuda()

## 提取训练集/验证集/测试集的特征向量

**把特征全部保存到本地,以后调用**

In [None]:
def SaveNd(data,net,name):
    x_list =[]
    y_list =[]
    print('extract %s' % name)
    for X,y in tqdm(data):
        X = X.cuda()
        net =  net.eval()
        out = net(X)
        x_list.append(out.cpu().data)
        y_list.append(y)
    print(len(x_list))
    print(len(y_list))
    print(x_list[0].shape)
    x_list = torch.cat(x_list,dim=0)
    y_list = torch.cat(y_list,dim=0)
    print('save %s' % name)
    torch.save([x_list,y_list], name)

SaveNd(train_dl,concatNet,'train_r152i3.pt')
SaveNd(valid_dl,concatNet,'valid_r152i3.pt')
SaveNd(train_valid_dl,concatNet,'input_r152i3.pt')
SaveNd(test_dl,concatNet,'test_r152i3.pt')

In [None]:
# x_list = [torch.randn((16,4096,1,1)),torch.randn((16,4096,1,1))]
# x_list = torch.cat(x_list,dim=0)
# print(x_list.shape)