In [1]:
from torchvision.datasets import FashionMNIST
from torchvision import transforms
from torch.utils.data import DataLoader


fashion_mnist_train = FashionMNIST("./FashionMNIST", train=True, download=True, transform=transforms.ToTensor())
fashion_mnist_test = FashionMNIST("./FashionMNIST", train=False, download=True, transform=transforms.ToTensor())
batch_size = 128
train_loader = DataLoader(fashion_mnist_train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(fashion_mnist_test, batch_size=batch_size, shuffle=True)

In [2]:
import torch
from torch import nn
from torch.autograd import Variable as V


class FlattenLayer(nn.Module):
    def forward(self, x):
        sizes = x.size()
        return x.view(sizes[0], -1)


conv_net = nn.Sequential(
    nn.Conv2d(1, 32, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(32),
    nn.Dropout2d(0.25),
    nn.Conv2d(32, 64, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.Dropout2d(0.25),
    FlattenLayer()
)

test_input = V(torch.ones(1, 1, 28, 28))
conv_output_size = conv_net(test_input).size()[-1]

mlp = nn.Sequential(
    nn.Linear(conv_output_size, 200),
    nn.ReLU(),
    nn.BatchNorm1d(200),
    nn.Dropout(0.25),
    nn.Linear(200, 10)
)

net = nn.Sequential(
    conv_net,
    mlp
)

In [3]:
def eval_net(net, data_loader):
    net.eval()
    ys = []
    ypreds = []
    for x, y in data_loader:
        x = V(x, volatile=True)
        y = V(y, volatile=True)
        _, y_pred = net(x).max(1)
        ys.append(y)
        ypreds.append(y_pred)
    ys = torch.cat(ys)
    ypreds = torch.cat(ypreds)
    acc = (ys == ypreds).float().sum() / len(ys)
    return acc.data[0]

In [4]:
from torch import optim
from tqdm import tqdm


def train_net(net, train_loader, test_loader, optimizer_cls=optim.Adam, loss_fn=nn.CrossEntropyLoss(), n_iter=10):
    train_losses = []
    train_acc = []
    val_acc = []
    optimizer = optimizer_cls(net.parameters())
    for epoch in range(n_iter):
        running_loss = 0.0
        net.train()
        n = 0
        n_acc = 0
        for i, (x, y) in tqdm(enumerate(train_loader), total=len(train_loader)):
            xx = V(x)
            yy = V(y)
            h = net(xx)
            loss = loss_fn(h, yy)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            n += len(xx)
            _, y_pred = h.max(1)
            n_acc += (yy == y_pred).float().sum().data[0]
        train_losses.append(running_loss / i)
        train_acc.append(n_acc / n)
        val_acc.append(eval_net(net, test_loader))
        print(epoch, train_losses[-1], train_acc[-1], val_acc[-1], flush=True)

In [5]:
train_net(net, train_loader, test_loader, n_iter=20)

100%|██████████| 469/469 [00:38<00:00, 12.31it/s]


0 0.4815278327108448 0.8320833333333333 0.8723000288009644


100%|██████████| 469/469 [00:49<00:00,  9.44it/s]


1 0.32369927507944596 0.8827833333333334 0.8866999745368958


100%|██████████| 469/469 [00:52<00:00,  8.95it/s]


2 0.2884564301651767 0.8942333333333333 0.8986999988555908


100%|██████████| 469/469 [00:57<00:00,  8.15it/s]


3 0.26656204455683374 0.9019166666666667 0.9043999910354614


100%|██████████| 469/469 [00:54<00:00,  8.63it/s]


4 0.25000606870485675 0.90835 0.9101999998092651


100%|██████████| 469/469 [00:48<00:00,  9.69it/s]


5 0.23602418534648725 0.9131333333333334 0.9077000021934509


100%|██████████| 469/469 [00:43<00:00, 10.87it/s]


6 0.22705599184856456 0.9160666666666667 0.9115999937057495


100%|██████████| 469/469 [00:53<00:00,  8.74it/s]


7 0.21492699575093058 0.9207333333333333 0.9144999980926514


100%|██████████| 469/469 [00:59<00:00,  7.92it/s]


8 0.20887784467229986 0.9216333333333333 0.9114999771118164


100%|██████████| 469/469 [01:04<00:00,  7.32it/s]


9 0.2003056875979289 0.9251 0.9175999760627747


100%|██████████| 469/469 [00:59<00:00,  7.82it/s]


10 0.1961685505050879 0.9263666666666667 0.9132000207901001


100%|██████████| 469/469 [01:10<00:00,  6.63it/s]


11 0.18798319586258158 0.9296 0.9157999753952026


100%|██████████| 469/469 [01:06<00:00,  7.06it/s]


12 0.18222791945131925 0.9321666666666667 0.9197999835014343


100%|██████████| 469/469 [01:12<00:00,  6.45it/s]


13 0.1743777973147539 0.9354 0.9171000123023987


100%|██████████| 469/469 [01:06<00:00,  7.08it/s]


14 0.17058991266685164 0.93695 0.9205999970436096


100%|██████████| 469/469 [01:01<00:00,  7.60it/s]


15 0.16815038972621799 0.9367166666666666 0.9190000295639038


100%|██████████| 469/469 [01:04<00:00,  7.29it/s]


16 0.1639144382495274 0.9393333333333334 0.9174000024795532


100%|██████████| 469/469 [01:03<00:00,  7.42it/s]


17 0.15896545889445096 0.94075 0.9205999970436096


100%|██████████| 469/469 [01:02<00:00,  7.52it/s]


18 0.1552429913312324 0.9413833333333333 0.919700026512146


100%|██████████| 469/469 [00:53<00:00,  8.71it/s]


19 0.1536693780038219 0.9418 0.9204000234603882


In [6]:
from torchvision.datasets import ImageFolder
from torchvision import transforms


# https://github.com/lucidfrontier45/PyTorch-Book/blob/master/data/taco_and_burrito.tar.gz
# import urllib.request as req
# req.urlretrieve('https://github.com/lucidfrontier45/PyTorch-Book/raw/master/data/taco_and_burrito.tar.gz', 'taco_and_burrito.tar.gz')
# tar -zxvf taco_and_burrito.tar.gz

# ImageFolder関数を使ってDatasetを作成する
train_imgs = ImageFolder(
    'taco_and_burrito/train/', transform=transforms.Compose([transforms.RandomCrop(224), transforms.ToTensor()])
)
test_imgs = ImageFolder(
    'taco_and_burrito/test/', transform=transforms.Compose([transforms.CenterCrop(224), transforms.ToTensor()])
)

# DataLoaderを作成
train_loader = DataLoader(
    train_imgs, batch_size=32, shuffle=True
)
test_loader = DataLoader(
    test_imgs, batch_size=32, shuffle=True
)

In [7]:
# ラベルを確認する
print(train_imgs.classes)
print(train_imgs.class_to_idx)

['burrito', 'taco']
{'burrito': 0, 'taco': 1}


In [8]:
from torchvision import models

net = models.resnet18(pretrained=True)
for p in net.parameters():
    p.requires_grad = False
fc_input_dim = net.fc.in_features
net.fc = nn.Linear(fc_input_dim, 2)

In [9]:
def train_net(net, train_loader, test_loader, only_fc=True, optimizer_cls=optim.Adam, loss_fn=nn.CrossEntropyLoss(), n_iter=10):
    train_losses = []
    train_acc = []
    val_acc = []
    if only_fc:
        optimizer = optimizer_cls(net.fc.parameters())
    else:
        optimizer = optimizer_cls(net.parameters())
    for epoch in range(n_iter):
        running_loss = 0.0
        net.train()
        n = 0
        n_acc = 0
        for i, (x, y) in tqdm(enumerate(train_loader), total=len(train_loader)):
            xx = V(x)
            yy = V(y)
            h = net(xx)
            loss = loss_fn(h, yy)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            n += len(xx)
            _, y_pred = h.max(1)
            n_acc += (yy == y_pred).float().sum().data[0]
        train_losses.append(running_loss / i)
        train_acc.append(n_acc / n)
        val_acc.append(eval_net(net, test_loader))
        print(epoch, train_losses[-1], train_acc[-1], val_acc[-1], flush=True)

In [10]:
train_net(net, train_loader, test_loader)

100%|██████████| 23/23 [01:19<00:00,  3.45s/it]


0 0.692977333610708 0.6109550561797753 0.75


100%|██████████| 23/23 [00:58<00:00,  2.55s/it]


1 0.5648336153138768 0.7219101123595506 0.800000011920929


100%|██████████| 23/23 [00:52<00:00,  2.29s/it]


2 0.47029364244504407 0.8047752808988764 0.8500000238418579


100%|██████████| 23/23 [00:52<00:00,  2.26s/it]


3 0.44089352271773596 0.8202247191011236 0.8500000238418579


100%|██████████| 23/23 [00:53<00:00,  2.35s/it]


4 0.4000209414146163 0.851123595505618 0.8833333253860474


100%|██████████| 23/23 [00:51<00:00,  2.26s/it]


5 0.39715006812052295 0.8412921348314607 0.8833333253860474


100%|██████████| 23/23 [00:51<00:00,  2.25s/it]


6 0.3938699642365629 0.851123595505618 0.8666666746139526


100%|██████████| 23/23 [00:51<00:00,  2.26s/it]


7 0.3751860680905255 0.8525280898876404 0.8666666746139526


100%|██████████| 23/23 [00:52<00:00,  2.27s/it]


8 0.35056799176064407 0.8651685393258427 0.8666666746139526


100%|██████████| 23/23 [00:51<00:00,  2.23s/it]


9 0.3556136963042346 0.8623595505617978 0.8999999761581421


In [11]:
conv_net = nn.Sequential(
    nn.Conv2d(3, 32, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(32),
    nn.Conv2d(32, 64, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.Conv2d(64, 128, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(128),
    FlattenLayer()
)

test_input = V(torch.ones(1, 3, 224, 224))
conv_output_size = conv_net(test_input).size()[-1]
net = nn.Sequential(
    conv_net,
    nn.Linear(conv_output_size, 2)
)
train_net(net, train_loader, test_loader, n_iter=10, only_fc=False)

100%|██████████| 23/23 [01:16<00:00,  3.34s/it]


0 2.347672779451717 0.5421348314606742 0.550000011920929


100%|██████████| 23/23 [01:15<00:00,  3.28s/it]


1 2.830597899176858 0.6151685393258427 0.6000000238418579


100%|██████████| 23/23 [01:14<00:00,  3.26s/it]


2 2.5370710546320137 0.6095505617977528 0.699999988079071


100%|██████████| 23/23 [01:14<00:00,  3.25s/it]


3 2.5809576240452854 0.6432584269662921 0.6333333253860474


100%|██████████| 23/23 [01:14<00:00,  3.24s/it]


4 2.3385156393051147 0.6179775280898876 0.5166666507720947


100%|██████████| 23/23 [01:14<00:00,  3.24s/it]


5 2.7094398086721245 0.6474719101123596 0.6499999761581421


100%|██████████| 23/23 [01:15<00:00,  3.26s/it]


6 2.401399401101199 0.6446629213483146 0.5666666626930237


100%|██████████| 23/23 [01:14<00:00,  3.23s/it]


7 1.9012316438284786 0.672752808988764 0.6333333253860474


100%|██████████| 23/23 [01:14<00:00,  3.25s/it]


8 2.158865836533633 0.6657303370786517 0.6833333373069763


100%|██████████| 23/23 [01:15<00:00,  3.28s/it]


9 2.3084469004110857 0.6699438202247191 0.5666666626930237
