In [1]:
from torchvision.datasets import FashionMNIST
from torchvision import transforms
from torch.utils.data import DataLoader


fashion_mnist_train = FashionMNIST("./FashionMNIST", train=True, download=True, transform=transforms.ToTensor())
fashion_mnist_test = FashionMNIST("./FashionMNIST", train=False, download=True, transform=transforms.ToTensor())
batch_size = 128
train_loader = DataLoader(fashion_mnist_train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(fashion_mnist_test, batch_size=batch_size, shuffle=True)

In [2]:
import torch
from torch import nn
from torch.autograd import Variable as V


class FlattenLayer(nn.Module):
    def forward(self, x):
        sizes = x.size()
        return x.view(sizes[0], -1)


conv_net = nn.Sequential(
    nn.Conv2d(1, 32, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(32),
    nn.Dropout2d(0.25),
    nn.Conv2d(32, 64, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.Dropout2d(0.25),
    FlattenLayer()
)

test_input = V(torch.ones(1, 1, 28, 28))
conv_output_size = conv_net(test_input).size()[-1]

mlp = nn.Sequential(
    nn.Linear(conv_output_size, 200),
    nn.ReLU(),
    nn.BatchNorm1d(200),
    nn.Dropout(0.25),
    nn.Linear(200, 10)
)

net = nn.Sequential(
    conv_net,
    mlp
)

In [3]:
def eval_net(net, data_loader):
    net.eval()
    ys = []
    ypreds = []
    for x, y in data_loader:
        x = V(x, volatile=True)
        y = V(y, volatile=True)
        _, y_pred = net(x).max(1)
        ys.append(y)
        ypreds.append(y_pred)
    ys = torch.cat(ys)
    ypreds = torch.cat(ypreds)
    acc = (ys == ypreds).float().sum() / len(ys)
    return acc.data[0]

In [4]:
from torch import optim
from tqdm import tqdm


def train_net(net, train_loader, test_loader, optimizer_cls=optim.Adam, loss_fn=nn.CrossEntropyLoss(), n_iter=10):
    train_losses = []
    train_acc = []
    val_acc = []
    optimizer = optimizer_cls(net.parameters())
    for epoch in range(n_iter):
        running_loss = 0.0
        net.train()
        n = 0
        n_acc = 0
        for i, (x, y) in tqdm(enumerate(train_loader), total=len(train_loader)):
            xx = V(x)
            yy = V(y)
            h = net(xx)
            loss = loss_fn(h, yy)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            n += len(xx)
            _, y_pred = h.max(1)
            n_acc += (yy == y_pred).float().sum().data[0]
        train_losses.append(running_loss / i)
        train_acc.append(n_acc / n)
        val_acc.append(eval_net(net, test_loader))
        print(epoch, train_losses[-1], train_acc[-1], val_acc[-1], flush=True)

In [5]:
train_net(net, train_loader, test_loader, n_iter=20)

100%|██████████| 469/469 [00:42<00:00, 10.99it/s]


0 0.46678688348485875 0.837 0.8758999705314636


100%|██████████| 469/469 [00:43<00:00, 10.67it/s]


1 0.3179559440821664 0.8842833333333333 0.8960999846458435


100%|██████████| 469/469 [00:45<00:00, 10.32it/s]


2 0.2820433929371528 0.8970833333333333 0.902400016784668


100%|██████████| 469/469 [00:42<00:00, 11.15it/s]


3 0.25928136168254745 0.90545 0.9036999940872192


100%|██████████| 469/469 [00:41<00:00, 11.27it/s]


4 0.2467607801191063 0.9092333333333333 0.9057000279426575


100%|██████████| 469/469 [00:44<00:00, 10.49it/s]


5 0.23194421493472198 0.9144 0.9132999777793884


100%|██████████| 469/469 [00:41<00:00, 11.22it/s]


6 0.22141847159299585 0.9178 0.8959000110626221


100%|██████████| 469/469 [00:43<00:00, 10.80it/s]


7 0.21102253363555312 0.9214833333333333 0.9108999967575073


100%|██████████| 469/469 [00:41<00:00, 11.34it/s]


8 0.20307957562498558 0.9253333333333333 0.9179999828338623


100%|██████████| 469/469 [00:41<00:00, 11.23it/s]


9 0.1973772682567947 0.9265 0.9168999791145325


100%|██████████| 469/469 [00:43<00:00, 10.82it/s]


10 0.19072624799023327 0.9279166666666666 0.9154999852180481


100%|██████████| 469/469 [00:45<00:00, 10.24it/s]


11 0.18401505578404817 0.93215 0.9171000123023987


100%|██████████| 469/469 [00:41<00:00, 11.35it/s]


12 0.17770570042168993 0.9333166666666667 0.9186999797821045


100%|██████████| 469/469 [00:42<00:00, 11.04it/s]


13 0.1728153092484189 0.9360666666666667 0.9197999835014343


100%|██████████| 469/469 [00:43<00:00, 10.74it/s]


14 0.16913049841602135 0.9356833333333333 0.9200999736785889


100%|██████████| 469/469 [00:43<00:00, 10.88it/s]


15 0.16296905311795637 0.9390833333333334 0.921500027179718


100%|██████████| 469/469 [00:45<00:00, 10.23it/s]


16 0.16010286600098142 0.9405833333333333 0.9207000136375427


100%|██████████| 469/469 [00:48<00:00,  9.66it/s]


17 0.15833411957177088 0.9390833333333334 0.9187999963760376


100%|██████████| 469/469 [00:41<00:00, 11.18it/s]


18 0.15336055326092446 0.9419 0.917900025844574


100%|██████████| 469/469 [00:45<00:00, 10.23it/s]


19 0.14977516746546468 0.9438166666666666 0.920799970626831


In [6]:
from torchvision.datasets import ImageFolder
from torchvision import transforms


# https://github.com/lucidfrontier45/PyTorch-Book/blob/master/data/taco_and_burrito.tar.gz
# import urllib.request as req
# req.urlretrieve('https://github.com/lucidfrontier45/PyTorch-Book/raw/master/data/taco_and_burrito.tar.gz', 'taco_and_burrito.tar.gz')
# tar -zxvf taco_and_burrito.tar.gz

# ImageFolder関数を使ってDatasetを作成する
train_imgs = ImageFolder(
    'taco_and_burrito/train/', transform=transforms.Compose([transforms.RandomCrop(224), transforms.ToTensor()])
)
test_imgs = ImageFolder(
    'taco_and_burrito/test/', transform=transforms.Compose([transforms.CenterCrop(224), transforms.ToTensor()])
)

# DataLoaderを作成
train_loader = DataLoader(
    train_imgs, batch_size=32, shuffle=True
)
test_loader = DataLoader(
    test_imgs, batch_size=32, shuffle=True
)

In [7]:
# ラベルを確認する
print(train_imgs.classes)
print(train_imgs.class_to_idx)

['burrito', 'taco']
{'burrito': 0, 'taco': 1}


In [8]:
from torchvision import models

net = models.resnet18(pretrained=True)
for p in net.parameters():
    p.requires_grad = False
fc_input_dim = net.fc.in_features
net.fc = nn.Linear(fc_input_dim, 2)

In [9]:
def train_net(net, train_loader, test_loader, only_fc=True, optimizer_cls=optim.Adam, loss_fn=nn.CrossEntropyLoss(), n_iter=10):
    train_losses = []
    train_acc = []
    val_acc = []
    if only_fc:
        optimizer = optimizer_cls(net.fc.parameters())
    else:
        optimizer = optimizer_cls(net.parameters())
    for epoch in range(n_iter):
        running_loss = 0.0
        net.train()
        n = 0
        n_acc = 0
        for i, (x, y) in tqdm(enumerate(train_loader), total=len(train_loader)):
            xx = V(x)
            yy = V(y)
            h = net(xx)
            loss = loss_fn(h, yy)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            n += len(xx)
            _, y_pred = h.max(1)
            n_acc += (yy == y_pred).float().sum().data[0]
        train_losses.append(running_loss / i)
        train_acc.append(n_acc / n)
        val_acc.append(eval_net(net, test_loader))
        print(epoch, train_losses[-1], train_acc[-1], val_acc[-1], flush=True)

In [10]:
train_net(net, train_loader, test_loader)

100%|██████████| 23/23 [00:49<00:00,  2.17s/it]


0 0.7299538336016915 0.5926966292134831 0.6333333253860474


100%|██████████| 23/23 [00:55<00:00,  2.42s/it]


1 0.5614853582598947 0.7359550561797753 0.699999988079071


100%|██████████| 23/23 [00:50<00:00,  2.18s/it]


2 0.49056698788296094 0.8019662921348315 0.7666666507720947


100%|██████████| 23/23 [00:57<00:00,  2.49s/it]


3 0.5161477164788679 0.7598314606741573 0.7666666507720947


100%|██████████| 23/23 [01:03<00:00,  2.74s/it]


4 0.4553268958221782 0.797752808988764 0.800000011920929


100%|██████████| 23/23 [00:52<00:00,  2.29s/it]


5 0.4060501408847896 0.8441011235955056 0.8666666746139526


100%|██████████| 23/23 [00:54<00:00,  2.36s/it]


6 0.37448320808735763 0.8693820224719101 0.8666666746139526


100%|██████████| 23/23 [00:52<00:00,  2.28s/it]


7 0.37961139191280713 0.8497191011235955 0.8500000238418579


100%|██████████| 23/23 [00:50<00:00,  2.22s/it]


8 0.3509413938630711 0.875 0.8500000238418579


100%|██████████| 23/23 [00:55<00:00,  2.40s/it]


9 0.3623004846952178 0.8581460674157303 0.8333333134651184


In [11]:
conv_net = nn.Sequential(
    nn.Conv2d(3, 32, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(32),
    nn.Conv2d(32, 64, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.Conv2d(64, 128, 5),
    nn.MaxPool2d(2),
    nn.ReLU(),
    nn.BatchNorm2d(128),
    FlattenLayer()
)

test_input = V(torch.ones(1, 3, 224, 224))
conv_output_size = conv_net(test_input).size()[-1]
net = nn.Sequential(
    conv_net,
    nn.Linear(conv_output_size, 2)
)
train_net(net, train_loader, test_loader, n_iter=10, only_fc=False)

100%|██████████| 23/23 [01:16<00:00,  3.33s/it]


0 2.7182205427776682 0.5688202247191011 0.5


100%|██████████| 23/23 [01:36<00:00,  4.19s/it]


1 3.1762400486252527 0.5463483146067416 0.6000000238418579


100%|██████████| 23/23 [01:38<00:00,  4.30s/it]


2 2.2805711301890286 0.6530898876404494 0.4833333194255829


100%|██████████| 23/23 [01:33<00:00,  4.07s/it]


3 2.761759444393895 0.6235955056179775 0.5


100%|██████████| 23/23 [01:26<00:00,  3.76s/it]


4 3.1223117925904016 0.6348314606741573 0.5166666507720947


100%|██████████| 23/23 [01:22<00:00,  3.59s/it]


5 2.7239201448180457 0.6474719101123596 0.5666666626930237


100%|██████████| 23/23 [01:40<00:00,  4.37s/it]


6 2.703263450752605 0.625 0.6666666865348816


100%|██████████| 23/23 [01:39<00:00,  4.33s/it]


7 2.4848244325681166 0.6306179775280899 0.6166666746139526


100%|██████████| 23/23 [01:24<00:00,  3.66s/it]


8 2.7446887980807912 0.6390449438202247 0.6666666865348816


100%|██████████| 23/23 [01:30<00:00,  3.94s/it]


9 2.492222574624148 0.6235955056179775 0.6666666865348816


In [12]:
# http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz
class DownSizePairImageFolder(ImageFolder):
    def __init__(self, root, transform=None, large_size=128, small_size=32, **kwds):
        super().__init__(root, transform=transform, **kwds)
        self.large_resizer = transforms.Resize(large_size)
        self.small_resizer = transforms.Resize(small_size)
        
    def __getitem__(self, index):
        path, _ = self.imgs[index]
        img = self.loader(path)
        large_img = self.large_resizer(img)
        small_img = self.small_resizer(img)
        if self.transform is not None:
            large_img = self.transform(large_img)
            small_img = self.transform(small_img)
        return small_img, large_img

In [13]:
train_data = DownSizePairImageFolder('./lfw-deepfunneled/train', transform=transforms.ToTensor())
test_data = DownSizePairImageFolder('./lfw-deepfunneled/test', transform=transforms.ToTensor())
batch_size = 32
train_loader = DataLoader(train_data, batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_data, batch_size, shuffle=False, num_workers=4)

In [14]:
net = nn.Sequential(
    nn.Conv2d(3, 256, 4, stride=2, padding=1),
    nn.ReLU(),
    nn.BatchNorm2d(256),
    nn.Conv2d(256, 512, 4, stride=2, padding=1),
    nn.ReLU(),
    nn.BatchNorm2d(512),
    nn.ConvTranspose2d(512, 256, 4, stride=2, padding=1),
    nn.ReLU(),
    nn.BatchNorm2d(256),
    nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1),
    nn.ReLU(),
    nn.BatchNorm2d(128),
    nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.ConvTranspose2d(64, 3, 4, stride=2, padding=1)
)

In [15]:
import math


def psnr(mse, max_v=1.0):
    return 10 * math.log10(max_v / mse)

def eval_net(net, data_loader):
    net.eval()
    ys = []
    ypreds = []
    for x, y in data_loader:
        x = V(x, volatile=True)
        y = V(y, volatile=True)
        y_pred = net(x)
        ys.append(y)
        ypreds.append(y_pred)
    ys = torch.cat(ys)
    ypreds = torch.cat(ypreds)
    score = nn.functional.mse_loss(ypreds, ys).data[0]
    return score

In [16]:
def train_net(net, train_loader, test_loader, optimizer_cls=optim.Adam, loss_fn=nn.MSELoss(), n_iter=10):
    train_losses = []
    train_acc = []
    val_acc = []
    optimizer = optimizer_cls(net.parameters())
    for epoch in range(n_iter):
        running_loss = 0.0
        net.train()
        n = 0
        score = 0
        for i, (x, y) in tqdm(enumerate(train_loader), total=len(train_loader)):
            xx = V(x)
            yy = V(y)
            y_pred = net(xx)
            loss = loss_fn(y_pred, yy)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            n += len(xx)
        train_losses.append(running_loss / len(train_loader))
        val_acc.append(eval_net(net, test_loader))
        print(epoch, train_losses[-1], psnr(train_losses[-1]), psnr(val_acc[-1]), flush=True)

In [17]:
train_net(net, train_loader, test_loader)

100%|██████████| 409/409 [12:17<00:00,  1.80s/it]


0 0.016578403832094885 17.80457285624302 24.191130682406925


100%|██████████| 409/409 [12:41<00:00,  1.86s/it]


1 0.003228551738617673 24.909922494885542 26.395216025772466


100%|██████████| 409/409 [12:32<00:00,  1.84s/it]


2 0.0026929591119763028 25.69770240564678 25.948927933873808


100%|██████████| 409/409 [11:42<00:00,  1.72s/it]


3 0.0026202815067373707 25.81652048277519 25.936836673415215


100%|██████████| 409/409 [11:33<00:00,  1.70s/it]


4 0.002566667922321607 25.90630317083332 27.14796109551059


100%|██████████| 409/409 [12:12<00:00,  1.79s/it]


5 0.002307930293499804 26.367773123298043 26.230468280274852


100%|██████████| 409/409 [11:59<00:00,  1.76s/it]


6 0.0022951341208530484 26.39191930499515 27.275115015206346


100%|██████████| 409/409 [12:11<00:00,  1.79s/it]


7 0.0021929114477476676 26.589789052723273 27.154490215111267


100%|██████████| 409/409 [11:31<00:00,  1.69s/it]


8 0.0023198145524824087 26.345467315111048 27.5110749127424


100%|██████████| 409/409 [10:35<00:00,  1.55s/it]


9 0.0020835813977994697 26.811895286002088 26.96941164765856


In [18]:
from torchvision.utils import save_image


random_test_loader = DataLoader(test_data, batch_size=4, shuffle=True)
it = iter(random_test_loader)
x, y = next(it)
bl_recon = torch.nn.functional.upsample(x, 128, mode='bilinear')
yp = net(V(x, volatile=True))
save_image(torch.cat([y, bl_recon.data, yp.data], 0), 'cnn_upscale.jpg', nrow=4)

In [19]:
# http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz
img_data = ImageFolder('./oxford-102/', transform=transforms.Compose([transforms.Resize(80), transforms.CenterCrop(64), transforms.ToTensor()]))
batch_size = 64
img_loader = DataLoader(img_data, batch_size=batch_size, shuffle=True)

In [20]:
# 
nz = 100
ngf = 32

class GNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(ngf, 3, 4, 2, 1, bias=False),
            nn.Tanh()
        )
    
    def forward(self, x):
        out = self.main(x)
        return out

In [21]:
ndf = 32

class DNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            nn.Conv2d(3, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False)
        )

    def forward(self, x):
        out = self.main(x)
        return out.squeeze()

In [22]:
d = DNet()
g = GNet()

opt_d = optim.Adam(d.parameters(), lr=0.0002, betas=(0.5, 0.999))
opt_g = optim.Adam(g.parameters(), lr=0.0002, betas=(0.5, 0.999))
ones = V(torch.ones(batch_size))
zeros = V(torch.zeros(batch_size))
loss_f = nn.BCEWithLogitsLoss()
fixed_z = V(torch.randn(batch_size, nz, 1, 1))

In [23]:
from statistics import mean


def train_dcgan(g, d, opt_g, opt_d, loader):
    log_loss_g = []
    log_loss_d = []
    for real_img, _ in tqdm(loader):
        batch_len = len(real_img)
        z = torch.randn(batch_len, nz, 1, 1)
        fake_img = g(V(z))
        fake_img_tensor = fake_img.data
        out = d(fake_img)
        loss_g = loss_f(out, ones[:batch_len])
        log_loss_g.append(loss_g.data[0])
        d.zero_grad()
        g.zero_grad()
        loss_g.backward()
        opt_g.step()
        real_out = d(V(real_img))
        loss_d_real = loss_f(real_out, ones[:batch_len])
        fake_img = V(fake_img_tensor)
        fake_out = d(fake_img)
        loss_d_fake = loss_f(fake_out, zeros[:batch_len])
        loss_d = loss_d_real + loss_d_fake
        log_loss_d.append(loss_d.data[0])
        d.zero_grad()
        g.zero_grad()
        loss_d.backward()
        opt_d.step()
    return mean(log_loss_g), mean(log_loss_d)

In [None]:
for epoch in range(100):
    train_dcgan(g, d, opt_g, opt_d, img_loader)
    if epoch % 10 == 0:
        torch.save(
            g.state_dict(),
            './oxford-102-gen/g_{:03d}.prm'.format(epoch),
            pickle_protocol=4
        )
        torch.save(
            d.state_dict(),
            './oxford-102-gen/g_{:03d}.prm'.format(epoch),
            pickle_protocol=4
        )
        generated_img = g(fixed_z).data
        save_image(
            generated_img,
            './oxford-102-gen/{:03d}.jpg'.format(epoch)
        )

100%|██████████| 128/128 [02:38<00:00,  1.24s/it]
100%|██████████| 128/128 [02:33<00:00,  1.20s/it]
100%|██████████| 128/128 [02:35<00:00,  1.22s/it]
100%|██████████| 128/128 [02:33<00:00,  1.20s/it]
100%|██████████| 128/128 [02:35<00:00,  1.21s/it]
100%|██████████| 128/128 [02:33<00:00,  1.20s/it]
100%|██████████| 128/128 [02:33<00:00,  1.20s/it]
100%|██████████| 128/128 [02:35<00:00,  1.22s/it]
100%|██████████| 128/128 [02:36<00:00,  1.22s/it]
100%|██████████| 128/128 [02:37<00:00,  1.23s/it]
100%|██████████| 128/128 [02:37<00:00,  1.23s/it]
100%|██████████| 128/128 [02:36<00:00,  1.23s/it]
100%|██████████| 128/128 [02:35<00:00,  1.22s/it]
100%|██████████| 128/128 [02:36<00:00,  1.22s/it]
100%|██████████| 128/128 [02:37<00:00,  1.23s/it]
100%|██████████| 128/128 [02:41<00:00,  1.26s/it]
100%|██████████| 128/128 [02:54<00:00,  1.36s/it]
100%|██████████| 128/128 [02:48<00:00,  1.31s/it]
100%|██████████| 128/128 [02:57<00:00,  1.38s/it]
100%|██████████| 128/128 [03:23<00:00,  1.59s/it]
