In [1]:
# 転移学習を使って巨人の肩に乗る
# ResNetはCNNの一種
# ResNetは精度が高いが大量のパラメーターをもつため大量の訓練用の画像が必要になる
# よって事前に学習してあるResNetネットワークの出力レイヤー以外を全て固定し、出力層のみ自分のデータについて学習し直す
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader


# Datasetを作成
# (224, 224)にクロップする
# ランダムにクロップするRandomCropと中心をクロップするCenterCropがある
train_imgs = ImageFolder(
    'taco_and_burrito/train/', transform=transforms.Compose([transforms.RandomCrop(224), transforms.ToTensor()])
)
test_imgs = ImageFolder(
    'taco_and_burrito/test/', transform=transforms.Compose([transforms.CenterCrop(224), transforms.ToTensor()])
)
# DataLoaderを作成
train_loader = DataLoader(
    train_imgs, batch_size=32, shuffle=True
)
test_loader = DataLoader(
    test_imgs, batch_size=32, shuffle=True
)

In [2]:
# ラベルを確認する
print(train_imgs.classes)
print(train_imgs.class_to_idx)

['burrito', 'taco']
{'taco': 1, 'burrito': 0}


In [3]:
from torch import nn
from torchvision import models


# 事前学習済みのresnet18をロード
net = models.resnet18(pretrained=True)
# 全てのパラメーターを微分対象外にする
for p in net.parameters():
    p.requires_grad = False
# 線形レイヤーを最後にくっつける(この最後の線形レイヤーだけ微分可能)
fc_input_dim = net.fc.in_features
net.fc = nn.Linear(fc_input_dim, 2)

In [4]:
import torch


def eval_net(net, data_loader):
    # ネットワークを評価モードにする(dropoutやバッチノーマリゼーションを無効化する)
    net.eval()
    ys = []
    ypreds = []
    for x, y in data_loader:
        x = V(x)
        y = V(y)
        # 確率が最大のクラスを取得
        _, y_pred = net(x).max(1)
        ys.append(y)
        ypreds.append(y_pred)
    # ミニバッチごとの正解と予測結果を一つにまとめる
    ys = torch.cat(ys)
    ypreds = torch.cat(ypreds)
    # 予測精度を計算
    acc = (ys == ypreds).float().sum() / len(ys)
    return acc.data[0]

In [5]:
from torch import optim
from torch.autograd import Variable as V
from tqdm import tqdm


def train_net(net, train_loader, test_loader, only_fc=True, optimizer_cls=optim.Adam, loss_fn=nn.CrossEntropyLoss(), n_iter=10):
    train_losses = []
    train_acc = []
    val_acc = []
    if only_fc:
        # 最後の線形レイヤーのパラメーターのみをoptimizerに渡す
        optimizer = optimizer_cls(net.fc.parameters())
    else:
        optimizer = optimizer_cls(net.parameters())
    for epoch in range(n_iter):
        running_loss = 0.0
        # ネットワークを訓練モードにする(dropoutやバッチノーマリゼーションを有効化する)
        net.train()
        n = 0
        n_acc = 0
        for i, (x, y) in tqdm(enumerate(train_loader), total=len(train_loader)):
            xx = V(x)
            yy = V(y)
            h = net(xx)
            loss = loss_fn(h, yy)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            n += len(xx)
            _, y_pred = h.max(1)
            n_acc += (yy == y_pred).float().sum().data[0]
        train_losses.append(running_loss / i)
        train_acc.append(n_acc / n)
        val_acc.append(eval_net(net, test_loader))
        print(epoch, train_losses[-1], train_acc[-1], val_acc[-1], flush=True)

In [6]:
train_net(net, train_loader, test_loader)

100%|██████████| 23/23 [00:53<00:00,  2.33s/it]


0 0.7563520548018542 0.523876404494382 0.6333333253860474


100%|██████████| 23/23 [00:53<00:00,  2.32s/it]


1 0.5963573537089608 0.6924157303370787 0.8333333134651184


100%|██████████| 23/23 [00:52<00:00,  2.30s/it]


2 0.5028857561674985 0.7865168539325843 0.8666666746139526


100%|██████████| 23/23 [00:51<00:00,  2.24s/it]


3 0.44452633640982886 0.8117977528089888 0.8833333253860474


100%|██████████| 23/23 [00:51<00:00,  2.22s/it]


4 0.40442022545771167 0.8398876404494382 0.8500000238418579


100%|██████████| 23/23 [00:54<00:00,  2.39s/it]


5 0.3995352421294559 0.8581460674157303 0.8500000238418579


100%|██████████| 23/23 [00:53<00:00,  2.32s/it]


6 0.379801559177312 0.8469101123595506 0.8333333134651184


100%|██████████| 23/23 [00:53<00:00,  2.32s/it]


7 0.3658615492961623 0.851123595505618 0.8833333253860474


100%|██████████| 23/23 [00:59<00:00,  2.61s/it]


8 0.37569423019886017 0.8300561797752809 0.8999999761581421


100%|██████████| 23/23 [00:52<00:00,  2.29s/it]


9 0.3330144888975404 0.8792134831460674 0.8500000238418579


In [7]:
# 適当に作ったCNNを試す
# 学習速度はResNet転移学習の半分ほどで、精度も低いことがわかる
class FlattenLayer(nn.Module):
    def forward(self, x):
        sizes = x.size()
        return x.view(sizes[0], -1)

    
conv_net = nn.Sequential(
    nn.Conv2d(3, 32, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(32),
    nn.Conv2d(32, 64, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.Conv2d(64, 128, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(128),
    FlattenLayer()
)

test_input = V(torch.ones(1, 3, 224, 224))
conv_output_size = conv_net(test_input).size()[-1]
net = nn.Sequential(
    conv_net,
    nn.Linear(conv_output_size, 2)
)

In [8]:
train_net(net, train_loader, test_loader, n_iter=10, only_fc=False)

100%|██████████| 23/23 [01:31<00:00,  3.96s/it]


0 2.4851428839293392 0.5856741573033708 0.5666666626930237


100%|██████████| 23/23 [01:29<00:00,  3.88s/it]


1 2.8922124125740747 0.6123595505617978 0.5666666626930237


100%|██████████| 23/23 [01:27<00:00,  3.80s/it]


2 2.703800764950839 0.6193820224719101 0.6499999761581421


100%|██████████| 23/23 [01:20<00:00,  3.52s/it]


3 2.236954026601531 0.6629213483146067 0.5166666507720947


100%|██████████| 23/23 [01:22<00:00,  3.58s/it]


4 2.173715358430689 0.6502808988764045 0.6000000238418579


100%|██████████| 23/23 [01:20<00:00,  3.49s/it]


5 2.2542605752294715 0.6320224719101124 0.5


100%|██████████| 23/23 [02:16<00:00,  5.91s/it]


6 2.745232040231878 0.6292134831460674 0.5666666626930237


100%|██████████| 23/23 [02:14<00:00,  5.85s/it]


7 2.7683641639622776 0.6207865168539326 0.5333333611488342


100%|██████████| 23/23 [02:11<00:00,  5.70s/it]


8 2.071707947687669 0.6867977528089888 0.6333333253860474


100%|██████████| 23/23 [02:10<00:00,  5.68s/it]


9 1.711417856541547 0.6264044943820225 0.6166666746139526
