In [5]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
import torchvision.models as vmodels
import torchvision
import random
from PIL import Image
import json
import Augmentor
torch.manual_seed(1)

USE_CUDA = torch.cuda.is_available()

import matplotlib.pyplot as plt
%matplotlib inline

http://cs231n.github.io/transfer-learning/

## 이미지넷 클래스 메타 정보 

In [6]:
idx2cls = json.load(open('imagenet_class.json','r',encoding='utf-8'))

In [8]:
list(idx2cls.items())[:10]

[('363', 'armadillo'),
 ('146', 'albatross, mollymawk'),
 ('593', 'harmonica, mouth organ, harp, mouth harp'),
 ('633', "loupe, jeweler's loupe"),
 ('754', 'radio, wireless'),
 ('825', 'stone wall'),
 ('391',
  'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch'),
 ('687', 'organ, pipe organ'),
 ('930', 'French loaf'),
 ('670', 'motor scooter, scooter')]

## Load Pretrained model 

In [45]:
model = vmodels.alexnet(pretrained=True)
model

AlexNet(
  (features): Sequential(
    (0): Conv2d (3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (3): Conv2d (64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (6): Conv2d (192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d (384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d (256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096)
    (2): ReLU(inplace)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=4096, out_features=4096)
    (5): ReLU(inplace)
    (6): Linea

In [47]:
modules = list(model.children())[0]
feature_extractor = nn.Sequential(*modules )
feature_extractor

Sequential(
  (0): Conv2d (3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU(inplace)
  (2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
  (3): Conv2d (64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(inplace)
  (5): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
  (6): Conv2d (192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(inplace)
  (8): Conv2d (384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU(inplace)
  (10): Conv2d (256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace)
  (12): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
)

## 파인튜닝 모델 

프리트레인드 모델을 특징 추출기로 사용

In [41]:
class CNN(nn.Module):
    
    def __init__(self,feature_extractor):
        super(CNN,self).__init__()
        self.feature_extractor = feature_extractor
        self.classifier = nn.Sequential(nn.Linear(256*6*6,512),
                                                    nn.Dropout(0.3),
                                                    nn.ReLU(),
                                                    nn.Linear(512,256),
                                                    nn.Dropout(0.3),
                                                    nn.ReLU(),
                                                    nn.Linear(256,2))
    def forward(self,inputs):
        
        features = self.feature_extractor(inputs)
        features = features.view(-1,256*6*6)
        
        return self.classifier(features)

In [62]:
model = CNN(feature_extractor)
model

CNN(
  (feature_extractor): Sequential(
    (0): Conv2d (3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (3): Conv2d (64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
    (6): Conv2d (192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d (384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d (256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
  )
  (classifier): Sequential(
    (0): Linear(in_features=9216, out_features=512)
    (1): Dropout(p=0.3)
    (2): ReLU()
    (3): Linear(in_features=512, out_features=256)
    (4): Dropout(p=0.3)
    (5): ReLU()
    (6): Linear(in_feature

In [63]:
for module in list(model.children())[0]: # feature extractor의 weight는 고정
    print("fix weight", module)
    for param in module.parameters():
        param.requires_grad = False

fix weight Conv2d (3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
fix weight ReLU(inplace)
fix weight MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
fix weight Conv2d (64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
fix weight ReLU(inplace)
fix weight MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))
fix weight Conv2d (192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
fix weight ReLU(inplace)
fix weight Conv2d (384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
fix weight ReLU(inplace)
fix weight Conv2d (256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
fix weight ReLU(inplace)
fix weight MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1))


## Data pipeline 

In [57]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

p = Augmentor.Pipeline()
p.rotate(probability=0.7, max_left_rotation=10, max_right_rotation=10)
p.zoom(probability=0.5, min_factor=1.1, max_factor=1.5)

# validate
preprosessing = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
           normalize,
        ])

# train
train_dataset = vdatasets.ImageFolder(
        "../../data/catdog/train/",
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.RandomHorizontalFlip(),
            p.torch_transform(),
            transforms.ToTensor(),
            normalize,
        ]))

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=32, 
                                           shuffle=True,
                                           num_workers=2)

In [58]:
print(train_dataset.class_to_idx)
print(len(train_dataset.imgs))

{'dog': 1, 'cat': 0}
2000


## Fine Tuning(Train) 

In [66]:
EPOCH=10

In [64]:
EPOCH = 10
LR = 0.1
loss_function = nn.CrossEntropyLoss()

# requires_grad가 True인 parameter만 update
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),lr=LR)

In [67]:
model.train()
for epoch in range(EPOCH):
    losses=[]
    for i,(inputs,targets) in enumerate(train_loader):
        inputs, targets = Variable(inputs), Variable(targets)
        model.zero_grad()
        preds = model(inputs)
        loss = loss_function(preds,targets)
        losses.append(loss.data[0])
        loss.backward()
        optimizer.step()
    
    print("[%d/%d] mean_loss : %.3f" % (epoch,EPOCH,np.mean(losses)))

[0/10] mean_loss : 0.696
[1/10] mean_loss : 0.696
[2/10] mean_loss : 0.695
[3/10] mean_loss : 0.696
[4/10] mean_loss : 0.699
[5/10] mean_loss : 0.695
[6/10] mean_loss : 0.694
[7/10] mean_loss : 0.697
[8/10] mean_loss : 0.700
[9/10] mean_loss : 0.709


In [16]:
for idx in pred.topk(5)[1].data.tolist()[0]:
    print(idx2cls[str(idx)])

rock crab, Cancer irroratus
goldfish, Carassius auratus
steam locomotive
leopard, Panthera pardus
beacon, lighthouse, beacon light, pharos
