In [1]:
import torch,os,torchvision
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader,Dataset
from torchvision import datasets,models,transforms
from PIL import Image
from sklearn.model_selection import StratifiedShuffleSplit
torch.__version__

'1.0.1.post2'

In [2]:
DATA_ROOT='dog_breed'
all_labels_df=pd.read_csv(os.path.join(DATA_ROOT,'labels.csv'))
all_labels_df.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [3]:
breeds=all_labels_df.breed.unique()
breed2idx=dict((breed,idx) for idx,breed in enumerate(breeds))
idx2breed=dict((idx,breed) for idx,breed in enumerate(breeds))
len(breeds)

120

In [4]:
all_labels_df['label_idx']=[breed2idx[b] for b in all_labels_df.breed]
all_labels_df.head()

Unnamed: 0,id,breed,label_idx
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull,0
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo,1
2,001cdf01b096e06d78e9e5112d419397,pekinese,2
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick,3
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever,4


In [5]:
class DogDataset(Dataset):
    def __init__(self, labels_df, img_path, transform=None):
        self.labels_df = labels_df
        self.img_path = img_path
        self.transform = transform
        
    def __len__(self):
        return self.labels_df.shape[0]
    def __getitem__(self,idx):
        image_name=os.path.join(self.img_path,self.labels_df.id[idx])+'.jpg'
        img=Image.open(image_name)
        label=self.labels_df.label_idx[idx]
        if self.transform:
            img=self.transform(img)
        return img,label

In [6]:
# 定义一些超参数
IMG_SIZE=224 # resnet50 的输入是224，所以需要将图片统一大小
BATCH_SIZE=128 # 这个批次大小需要占5G左右显存
IMG_MAEN=[0.485,0.456,0.406]
IMG_STD=[0.229,0.224,0.225]
CUDA=torch.cuda.is_available()
DEVICE=torch.device("cuda" if CUDA else "cpu")

In [7]:
# 定义训练和验证数据的图片变换规则
train_transforms=transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.RandomResizedCrop(IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(30),
    transforms.ToTensor(),
    transforms.Normalize(IMG_MAEN,IMG_STD)
])
val_transforms=transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(IMG_MAEN,IMG_STD)
])

In [8]:
# 我们这里分割10%的数据作为训练时的验证数据
dataset_names=['train','valid']
# stratified 分层的
stratified_split=StratifiedShuffleSplit(n_splits=1,test_size=0.1,random_state=0)
train_split_idx,val_split_idx=next(iter(stratified_split.split(all_labels_df.id,all_labels_df.breed)))
train_df=all_labels_df.iloc[train_split_idx].reset_index()
val_df=all_labels_df.iloc[val_split_idx].reset_index()
print(len(train_df))
print(len(val_df))

9199
1023


In [9]:
# 使用dataloader载入数据
image_transforms={'train':train_transforms,'valid':val_transforms}
train_dataset=DogDataset(train_df,os.path.join(DATA_ROOT,'train'),transform=image_transforms['train'])
val_dataset=DogDataset(val_df,os.path.join(DATA_ROOT,'train'),transform=image_transforms['valid'])
image_dataset={'train':train_dataset,'valid':val_dataset}
image_dataloader={x:DataLoader(image_dataset[x],batch_size=BATCH_SIZE,shuffle=True,num_workers=0) for x in dataset_names}
# num_workers，从注释可以看出这个参数必须大于等于0，0的话表示数据导入在主进程中进行，其他大于0的数表示通过多个进程来导入数据，可以加快数据导入速度
dataset_sizes={x:len(image_dataset[x]) for x in dataset_names}

In [10]:
# 开始配置网络，由于IMAGENET是识别1000个物体，我们的狗分类一共只有120,所以需要对模型的
# 最后一层全连接层进行微调，将输出从1000改为120
model_ft=models.resnet50(pretrained=True)
# 将所有的参数层进行冻结
for param in model_ft.parameters():
    param.requires_grad=False
print(model_ft.fc) # 打印全连接层的信息
num_fc_ftr=model_ft.fc.in_features # 获取fc层的输入
model_ft.fc=nn.Linear(num_fc_ftr,len(breeds)) # 定义一个新的fc层
model_ft=model_ft.to(DEVICE)
print(model_ft)

Linear(in_features=2048, out_features=1000, bias=True)
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Co

In [11]:
criterion=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam([
    {'params':model_ft.fc.parameters()}
],lr=0.001)

In [12]:
def train(model,device,train_loader,epoch):
    model.train()
    for batch_idx,data in enumerate(train_loader):
        x,y=data
        x=x.to(device)
        y=y.to(device)
        optimizer.zero_grad()
        y_hat=model(x)
        loss=criterion(y_hat,y)
        loss.backward()
        optimizer.step()
    print('Train Epoch: {}\t Loss: {:.6f}'.format(epoch,loss.item()))

In [15]:
def test(model,device,test_loader):
    model.eval()
    test_loss=0
    correct=0
    with torch.no_grad():
        for i,data in enumerate(test_loader):
            x,y=data
            x=x.to(device)
            y=y.to(device)
            optimizer.zero_grad()
            y_hat=model(x)
            test_loss += criterion(y_hat,y).item() # sum up batch loss
            pred=y_hat.max(1,keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(y.view_as(pred)).sum().item()
    test_loss /=len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss,correct,len(val_dataset),
    100.*correct/len(val_dataset)))

In [16]:
for epoch in range(1,10):
    # %time 用于测试语句运行的时间
    %time train(model=model_ft,device=DEVICE,train_loader=image_dataloader['train'],epoch=epoch)
    test(model=model_ft,device=DEVICE,test_loader=image_dataloader['valid'])

Train Epoch: 1	 Loss: 1.718921
CPU times: user 6min 23s, sys: 4.46 s, total: 6min 28s
Wall time: 59.9 s

Test set: Average loss: 0.0077, Accuracy: 778/1023 (76)

Train Epoch: 2	 Loss: 1.855529
CPU times: user 6min 20s, sys: 4.38 s, total: 6min 24s
Wall time: 58.9 s

Test set: Average loss: 0.0063, Accuracy: 807/1023 (79)

Train Epoch: 3	 Loss: 1.346943
CPU times: user 6min 19s, sys: 4.31 s, total: 6min 23s
Wall time: 59.1 s

Test set: Average loss: 0.0057, Accuracy: 799/1023 (78)

Train Epoch: 4	 Loss: 1.375629
CPU times: user 6min 20s, sys: 4.34 s, total: 6min 24s
Wall time: 59.2 s

Test set: Average loss: 0.0056, Accuracy: 807/1023 (79)

Train Epoch: 5	 Loss: 1.305099
CPU times: user 6min 19s, sys: 4.34 s, total: 6min 24s
Wall time: 59.1 s

Test set: Average loss: 0.0056, Accuracy: 805/1023 (79)

Train Epoch: 6	 Loss: 1.290596
CPU times: user 6min 20s, sys: 4.46 s, total: 6min 24s
Wall time: 59.1 s

Test set: Average loss: 0.0051, Accuracy: 820/1023 (80)

Train Epoch: 7	 Loss: 1.2486

In [18]:
# 固定层的向量导出
# 定义一个hook函数
in_list=[] # 这里存放所有的输出
def hook(module,input,output):
    # input是一个tuple代表顺序代表每一个输入项，这里我们只有一项，所以直接获取
    for i in range(input[0].size(0)):
        in_list.append(input[0][i].cpu().numpy())

In [19]:
model_ft.avgpool.register_forward_hook(hook)

<torch.utils.hooks.RemovableHandle at 0x7f46ddcdbac8>

In [20]:
%%time
with torch.no_grad():
    for batch_idx,data in enumerate(image_dataloader['train']):
        x,y=data
        x=x.to(DEVICE)
        y=y.to(DEVICE)
        y_hat=model_ft(x)

CPU times: user 7min 2s, sys: 24.6 s, total: 7min 26s
Wall time: 1min 52s


In [21]:
features=np.array(in_list)
np.save("features",features)