In [1]:
from torchvision.datasets import FashionMNIST
from torchvision import transforms
from torch.utils.data import DataLoader


fashion_mnist_train = FashionMNIST("./FashionMNIST", train=True, download=True, transform=transforms.ToTensor())
fashion_mnist_test = FashionMNIST("./FashionMNIST", train=False, download=True, transform=transforms.ToTensor())
batch_size = 128
train_loader = DataLoader(fashion_mnist_train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(fashion_mnist_test, batch_size=batch_size, shuffle=True)

In [2]:
import torch
from torch import nn
from torch.autograd import Variable as V


class FlattenLayer(nn.Module):
    def forward(self, x):
        sizes = x.size()
        return x.view(sizes[0], -1)


conv_net = nn.Sequential(
    nn.Conv2d(1, 32, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(32),
    nn.Dropout2d(0.25),
    nn.Conv2d(32, 64, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.Dropout2d(0.25),
    FlattenLayer()
)

test_input = V(torch.ones(1, 1, 28, 28))
conv_output_size = conv_net(test_input).size()[-1]

mlp = nn.Sequential(
    nn.Linear(conv_output_size, 200),
    nn.ReLU(),
    nn.BatchNorm1d(200),
    nn.Dropout(0.25),
    nn.Linear(200, 10)
)

net = nn.Sequential(
    conv_net,
    mlp
)

In [3]:
def eval_net(net, data_loader):
    net.eval()
    ys = []
    ypreds = []
    for x, y in data_loader:
        x = V(x, volatile=True)
        y = V(y, volatile=True)
        _, y_pred = net(x).max(1)
        ys.append(y)
        ypreds.append(y_pred)
    ys = torch.cat(ys)
    ypreds = torch.cat(ypreds)
    acc = (ys == ypreds).float().sum() / len(ys)
    return acc.data[0]

In [4]:
from torch import optim
from tqdm import tqdm


def train_net(net, train_loader, test_loader, optimizer_cls=optim.Adam, loss_fn=nn.CrossEntropyLoss(), n_iter=10):
    train_losses = []
    train_acc = []
    val_acc = []
    optimizer = optimizer_cls(net.parameters())
    for epoch in range(n_iter):
        running_loss = 0.0
        net.train()
        n = 0
        n_acc = 0
        for i, (x, y) in tqdm(enumerate(train_loader), total=len(train_loader)):
            xx = V(x)
            yy = V(y)
            h = net(xx)
            loss = loss_fn(h, yy)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            n += len(xx)
            _, y_pred = h.max(1)
            n_acc += (yy == y_pred).float().sum().data[0]
        train_losses.append(running_loss / i)
        train_acc.append(n_acc / n)
        val_acc.append(eval_net(net, test_loader))
        print(epoch, train_losses[-1], train_acc[-1], val_acc[-1], flush=True)

In [5]:
train_net(net, train_loader, test_loader, n_iter=20)

100%|██████████| 469/469 [00:36<00:00, 12.69it/s]


0 0.4696817793525182 0.8360166666666666 0.8837000131607056


100%|██████████| 469/469 [00:37<00:00, 12.35it/s]


1 0.31690023470128703 0.8852666666666666 0.8938999772071838


100%|██████████| 469/469 [00:38<00:00, 12.11it/s]


2 0.28366738490951365 0.8942666666666667 0.9020000100135803


100%|██████████| 469/469 [00:38<00:00, 12.13it/s]


3 0.2607022284124142 0.9054833333333333 0.9075999855995178


100%|██████████| 469/469 [00:39<00:00, 11.88it/s]


4 0.24268354862355268 0.9094833333333333 0.9107000231742859


100%|██████████| 469/469 [00:39<00:00, 12.00it/s]


5 0.23032876812558398 0.9146833333333333 0.9125999808311462


100%|██████████| 469/469 [00:39<00:00, 11.85it/s]


6 0.21889374651906326 0.9178333333333333 0.9100000262260437


100%|██████████| 469/469 [00:39<00:00, 11.90it/s]


7 0.2126369504019236 0.9212333333333333 0.916100025177002


100%|██████████| 469/469 [00:39<00:00, 11.93it/s]


8 0.20150579399086982 0.92575 0.9169999957084656


100%|██████████| 469/469 [00:40<00:00, 11.72it/s]


9 0.19574538350869447 0.9263333333333333 0.9199000000953674


100%|██████████| 469/469 [00:38<00:00, 12.22it/s]


10 0.19079051940486982 0.9280833333333334 0.9182999730110168


100%|██████████| 469/469 [00:40<00:00, 11.66it/s]


11 0.18188430547204792 0.93135 0.9222999811172485


100%|██████████| 469/469 [00:39<00:00, 11.91it/s]


12 0.1800253738482029 0.9325666666666667 0.921500027179718


100%|██████████| 469/469 [00:40<00:00, 11.59it/s]


13 0.17243293675984073 0.9364666666666667 0.9189000129699707


100%|██████████| 469/469 [00:40<00:00, 11.58it/s]


14 0.16959876688117656 0.9356166666666667 0.9230999946594238


100%|██████████| 469/469 [00:41<00:00, 11.36it/s]


15 0.1636534804939969 0.9384833333333333 0.9194999933242798


100%|██████████| 469/469 [00:40<00:00, 11.58it/s]


16 0.16048923446040633 0.9390833333333334 0.919700026512146


100%|██████████| 469/469 [00:40<00:00, 11.64it/s]


17 0.15733800464683872 0.941 0.9218000173568726


100%|██████████| 469/469 [00:39<00:00, 11.84it/s]


18 0.1507121831831387 0.9430666666666667 0.9196000099182129


100%|██████████| 469/469 [00:40<00:00, 11.48it/s]


19 0.1494983043680843 0.9437 0.9239000082015991


In [6]:
from torchvision.datasets import ImageFolder
from torchvision import transforms


# https://github.com/lucidfrontier45/PyTorch-Book/blob/master/data/taco_and_burrito.tar.gz
# tar -zxvf taco_and_burrito.tar.gz

# ImageFolder関数を使ってDatasetを作成する
train_imgs = ImageFolder(
    'taco_and_burrito/train/', transform=transforms.Compose([transforms.RandomCrop(224), transforms.ToTensor()])
)
test_imgs = ImageFolder(
    'taco_and_burrito/test/', transform=transforms.Compose([transforms.CenterCrop(224), transforms.ToTensor()])
)

# DataLoaderを作成
train_loader = DataLoader(
    train_imgs, batch_size=32, shuffle=True
)
test_loader = DataLoader(
    test_imgs, batch_size=32, shuffle=True
)

In [7]:
# ラベルを確認する
print(train_imgs.classes)
print(train_imgs.class_to_idx)

['burrito', 'taco']
{'taco': 1, 'burrito': 0}


In [8]:
from torchvision import models

net = models.resnet18(pretrained=True)
for p in net.parameters():
    p.requires_grad = False
fc_input_dim = net.fc.in_features
net.fc = nn.Linear(fc_input_dim, 2)

In [9]:
def train_net(net, train_loader, test_loader, only_fc=True, optimizer_cls=optim.Adam, loss_fn=nn.CrossEntropyLoss(), n_iter=10):
    train_losses = []
    train_acc = []
    val_acc = []
    if only_fc:
        optimizer = optimizer_cls(net.fc.parameters())
    else:
        optimizer = optimizer_cls(net.parameters())
    for epoch in range(n_iter):
        running_loss = 0.0
        net.train()
        n = 0
        n_acc = 0
        for i, (x, y) in tqdm(enumerate(train_loader), total=len(train_loader)):
            xx = V(x)
            yy = V(y)
            h = net(xx)
            loss = loss_fn(h, yy)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            n += len(xx)
            _, y_pred = h.max(1)
            n_acc += (yy == y_pred).float().sum().data[0]
        train_losses.append(running_loss / i)
        train_acc.append(n_acc / n)
        val_acc.append(eval_net(net, test_loader))
        print(epoch, train_losses[-1], train_acc[-1], val_acc[-1], flush=True)

In [10]:
train_net(net, train_loader, test_loader)

100%|██████████| 23/23 [00:48<00:00,  2.10s/it]


0 0.6655585102059625 0.625 0.7833333611488342


100%|██████████| 23/23 [00:48<00:00,  2.12s/it]


1 0.5223201445557855 0.7879213483146067 0.8333333134651184


100%|██████████| 23/23 [00:51<00:00,  2.24s/it]


2 0.4439111595804041 0.8216292134831461 0.8333333134651184


100%|██████████| 23/23 [00:48<00:00,  2.10s/it]


3 0.4073607461018996 0.8342696629213483 0.8500000238418579


100%|██████████| 23/23 [00:48<00:00,  2.09s/it]


4 0.383777800608765 0.8441011235955056 0.8500000238418579


100%|██████████| 23/23 [00:47<00:00,  2.06s/it]


5 0.37203958901492035 0.8455056179775281 0.8500000238418579


100%|██████████| 23/23 [00:47<00:00,  2.07s/it]


6 0.3480141697959466 0.8623595505617978 0.8833333253860474


100%|██████████| 23/23 [00:48<00:00,  2.13s/it]


7 0.36723836443640967 0.8455056179775281 0.8666666746139526


100%|██████████| 23/23 [00:47<00:00,  2.08s/it]


8 0.3484427881511775 0.8665730337078652 0.8333333134651184


100%|██████████| 23/23 [00:54<00:00,  2.39s/it]


9 0.3582162796096368 0.8469101123595506 0.8500000238418579


In [11]:
conv_net = nn.Sequential(
    nn.Conv2d(3, 32, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(32),
    nn.Conv2d(32, 64, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.Conv2d(64, 128, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(128),
    FlattenLayer()
)

test_input = V(torch.ones(1, 3, 224, 224))
conv_output_size = conv_net(test_input).size()[-1]
net = nn.Sequential(
    conv_net,
    nn.Linear(conv_output_size, 2)
)
train_net(net, train_loader, test_loader, n_iter=10, only_fc=False)

100%|██████████| 23/23 [01:15<00:00,  3.27s/it]


0 2.6316726587035437 0.5407303370786517 0.44999998807907104


100%|██████████| 23/23 [01:11<00:00,  3.11s/it]


1 2.4766790812665764 0.6264044943820225 0.4833333194255829


100%|██████████| 23/23 [01:14<00:00,  3.25s/it]


2 2.7498627359216865 0.6334269662921348 0.4833333194255829


100%|██████████| 23/23 [01:22<00:00,  3.57s/it]


3 2.803769415075129 0.6081460674157303 0.5833333134651184


100%|██████████| 23/23 [01:27<00:00,  3.81s/it]


4 2.489235281944275 0.6165730337078652 0.4833333194255829


100%|██████████| 23/23 [01:33<00:00,  4.06s/it]


5 2.6054998473687605 0.6390449438202247 0.6000000238418579


100%|██████████| 23/23 [01:32<00:00,  4.00s/it]


6 2.252506724812768 0.6376404494382022 0.5833333134651184


100%|██████████| 23/23 [01:21<00:00,  3.56s/it]


7 2.1001171361316335 0.6671348314606742 0.6166666746139526


100%|██████████| 23/23 [01:26<00:00,  3.78s/it]


8 1.9710289050232281 0.672752808988764 0.6333333253860474


100%|██████████| 23/23 [01:26<00:00,  3.77s/it]


9 2.076835884289308 0.6544943820224719 0.6166666746139526


In [12]:
# http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz
class DownSizePairImageFolder(ImageFolder):
    def __init__(self, root, transform=None, large_size=128, small_size=32, **kwds):
        super().__init__(root, transform=transform, **kwds)
        self.large_resizer = transforms.Resize(large_size)
        self.small_resizer = transforms.Resize(small_size)
        
    def __getitem__(self, index):
        path, _ = self.imgs[index]
        img = self.loader(path)
        large_img = self.large_resizer(img)
        small_img = self.small_resizer(img)
        if self.transform is not None:
            large_img = self.transform(large_img)
            small_img = self.transform(small_img)
        return small_img, large_img

In [13]:
train_data = DownSizePairImageFolder('./lfw-deepfunneled/train', transform=transforms.ToTensor())
test_data = DownSizePairImageFolder('./lfw-deepfunneled/test', transform=transforms.ToTensor())
batch_size = 32
train_loader = DataLoader(train_data, batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_data, batch_size, shuffle=False, num_workers=4)

In [14]:
net = nn.Sequential(
    nn.Conv2d(3, 256, 4, stride=2, padding=1),
    nn.ReLU(),
    nn.BatchNorm2d(256),
    nn.Conv2d(256, 512, 4, stride=2, padding=1),
    nn.ReLU(),
    nn.BatchNorm2d(512),
    nn.ConvTranspose2d(512, 256, 4, stride=2, padding=1),
    nn.ReLU(),
    nn.BatchNorm2d(256),
    nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1),
    nn.ReLU(),
    nn.BatchNorm2d(128),
    nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.ConvTranspose2d(64, 3, 4, stride=2, padding=1)
)

In [15]:
import math


def psnr(mse, max_v=1.0):
    return 10 * math.log10(max_v / mse)

def eval_net(net, data_loader):
    net.eval()
    ys = []
    ypreds = []
    for x, y in data_loader:
        x = V(x, volatile=True)
        y = V(y, volatile=True)
        y_pred = net(x)
        ys.append(y)
        ypreds.append(y_pred)
    ys = torch.cat(ys)
    ypreds = torch.cat(ypreds)
    score = nn.functional.mse_loss(ypreds, ys).data[0]
    return score

In [16]:
def train_net(net, train_loader, test_loader, optimizer_cls=optim.Adam, loss_fn=nn.MSELoss(), n_iter=10):
    train_losses = []
    train_acc = []
    val_acc = []
    optimizer = optimizer_cls(net.parameters())
    for epoch in range(n_iter):
        running_loss = 0.0
        net.train()
        n = 0
        score = 0
        for i, (x, y) in tqdm(enumerate(train_loader), total=len(train_loader)):
            xx = V(x)
            yy = V(y)
            y_pred = net(xx)
            loss = loss_fn(y_pred, yy)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            n += len(xx)
        train_losses.append(running_loss / len(train_loader))
        val_acc.append(eval_net(net, test_loader))
        print(epoch, train_losses[-1], psnr(train_losses[-1]), psnr(val_acc[-1]), flush=True)

In [17]:
train_net(net, train_loader, test_loader)

100%|██████████| 409/409 [12:57<00:00,  1.90s/it]


0 0.018142582017536706 17.413009049181525 24.479027268264574


100%|██████████| 409/409 [12:19<00:00,  1.81s/it]


1 0.003242177844063486 24.891631663607214 25.483795442144462


100%|██████████| 409/409 [13:53<00:00,  2.04s/it]


2 0.0029121399361840597 25.35787759835523 26.155996713396107


100%|██████████| 409/409 [10:58<00:00,  1.61s/it]


3 0.002594146669459755 25.860054731492745 26.475525198970328


100%|██████████| 409/409 [11:38<00:00,  1.71s/it]


4 0.002443933230710915 26.119106633762588 26.817901174546357


100%|██████████| 409/409 [11:08<00:00,  1.64s/it]


5 0.0023657246185398335 26.260358106840414 27.355058625512445


100%|██████████| 409/409 [10:56<00:00,  1.61s/it]


6 0.0023775054537520397 26.23878478162704 27.405311473942522


100%|██████████| 409/409 [12:14<00:00,  1.80s/it]


7 0.002277322324143135 26.42575496530273 26.55313750150667


100%|██████████| 409/409 [15:40<00:00,  2.30s/it]


8 0.0021751045797607682 26.625198571823844 26.902787021461776


100%|██████████| 409/409 [17:47<00:00,  2.61s/it]


9 0.002176459252203571 26.622494593169645 27.42281304501882


In [18]:
from torchvision.utils import save_image


random_test_loader = DataLoader(test_data, batch_size=4, shuffle=True)
it = iter(random_test_loader)
x, y = next(it)
bl_recon = torch.nn.functional.upsample(x, 128, mode='bilinear')
yp = net(V(x, volatile=True))
save_image(torch.cat([y, bl_recon.data, yp.data], 0), 'cnn_upscale.jpg', nrow=4)

In [19]:
# http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz
img_data = ImageFolder('./oxford-102/', transform=transforms.Compose([transforms.Resize(80), transforms.CenterCrop(64), transforms.ToTensor()]))
batch_size = 64
img_loader = DataLoader(img_data, batch_size=batch_size, shuffle=True)

In [20]:
# 
nz = 100
ngf = 32

class GNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(ngf, 3, 4, 2, 1, bias=False),
            nn.Tanh()
        )
    
    def forward(self, x):
        out = self.main(x)
        return out

In [21]:
ndf = 32

class DNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            nn.Conv2d(3, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False)
        )

    def forward(self, x):
        out = self.main(x)
        return out.squeeze()

In [22]:
d = DNet()
g = GNet()

opt_d = optim.Adam(d.parameters(), lr=0.0002, betas=(0.5, 0.999))
opt_g = optim.Adam(g.parameters(), lr=0.0002, betas=(0.5, 0.999))
ones = V(torch.ones(batch_size))
zeros = V(torch.zeros(batch_size))
loss_f = nn.BCEWithLogitsLoss()
fixed_z = V(torch.randn(batch_size, nz, 1, 1))

In [23]:
from statistics import mean


def train_dcgan(g, d, opt_g, opt_d, loader):
    log_loss_g = []
    log_loss_d = []
    for real_img, _ in tqdm(loader):
        batch_len = len(real_img)
        z = torch.randn(batch_len, nz, 1, 1)
        fake_img = g(V(z))
        fake_img_tensor = fake_img.data
        out = d(fake_img)
        loss_g = loss_f(out, ones[:batch_len])
        log_loss_g.append(loss_g.data[0])
        d.zero_grad()
        g.zero_grad()
        loss_g.backward()
        opt_g.step()
        real_out = d(V(real_img))
        loss_d_real = loss_f(real_out, ones[:batch_len])
        fake_img = V(fake_img_tensor)
        fake_out = d(fake_img)
        loss_d_fake = loss_f(fake_out, zeros[:batch_len])
        loss_d = loss_d_real + loss_d_fake
        log_loss_d.append(loss_d.data[0])
        d.zero_grad()
        g.zero_grad()
        loss_d.backward()
        opt_d.step()
    return mean(log_loss_g), mean(log_loss_d)

In [None]:
for epoch in range(100):
    train_dcgan(g, d, opt_g, opt_d, img_loader)
    if epoch % 10 == 0:
        torch.save(
            g.state_dict(),
            './oxford-102-gen/g_{:03d}.prm'.format(epoch),
            pickle_protocol=4
        )
        torch.save(
            d.state_dict(),
            './oxford-102-gen/g_{:03d}.prm'.format(epoch),
            pickle_protocol=4
        )
        generated_img = g(fixed_z).data
        save_image(
            generated_img,
            './oxford-102-gen/{:03d}.jpg'.format(epoch)
        )

100%|██████████| 128/128 [04:07<00:00,  1.93s/it]
100%|██████████| 128/128 [03:53<00:00,  1.82s/it]
100%|██████████| 128/128 [03:43<00:00,  1.75s/it]
100%|██████████| 128/128 [03:51<00:00,  1.81s/it]
100%|██████████| 128/128 [03:34<00:00,  1.67s/it]
100%|██████████| 128/128 [03:30<00:00,  1.65s/it]
100%|██████████| 128/128 [03:31<00:00,  1.65s/it]
100%|██████████| 128/128 [03:29<00:00,  1.64s/it]
100%|██████████| 128/128 [03:29<00:00,  1.64s/it]
100%|██████████| 128/128 [03:29<00:00,  1.64s/it]
100%|██████████| 128/128 [03:29<00:00,  1.64s/it]
100%|██████████| 128/128 [03:29<00:00,  1.63s/it]
100%|██████████| 128/128 [03:30<00:00,  1.65s/it]
100%|██████████| 128/128 [03:33<00:00,  1.67s/it]
100%|██████████| 128/128 [03:22<00:00,  1.58s/it]
100%|██████████| 128/128 [03:22<00:00,  1.59s/it]
100%|██████████| 128/128 [03:23<00:00,  1.59s/it]
100%|██████████| 128/128 [03:24<00:00,  1.60s/it]
100%|██████████| 128/128 [03:24<00:00,  1.59s/it]
100%|██████████| 128/128 [03:25<00:00,  1.61s/it]
