In [1]:
import pandas as pd
from PIL import Image
import torch
import torch.optim as optim
import torch.nn as nn
import torch.utils.data as data
import torchvision  
from torchvision import models, transforms
from tqdm import tqdm

In [2]:
# resize 32*32

In [3]:
class Generator(nn.Module):
    
    def __init__(self, z_dim=20, image_size=64):
        super(Generator, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.ConvTranspose2d(z_dim, image_size*8, kernel_size=4, stride=1),
            nn.BatchNorm2d(image_size*8),
            nn.ReLU(inplace=True)
        )
        
        self.layer2 = nn.Sequential(
            nn.ConvTranspose2d(image_size*8, image_size*4, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(image_size*4),
            nn.ReLU(inplace=True)
        )
            
        self.layer3 = nn.Sequential(
            nn.ConvTranspose2d(image_size*4, image_size*2, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(image_size*2),
            nn.ReLU(inplace=True)
        )
        
        self.layer4 = nn.Sequential(
            nn.ConvTranspose2d(image_size*2, image_size, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(image_size),
            nn.ReLU(inplace=True)
        )
        
        self.last = nn.Sequential(
            nn.ConvTranspose2d(image_size, 3, kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )
        
    def forward(self, z):
        out = self.layer1(z)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.last(out)
            
        return out

In [4]:
class Discriminator(nn.Module):
    
    def __init__(self, z_dim=20, image_size=64):
            super(Discriminator, self).__init__()
            
            self.layer1 = nn.Sequential(
                nn.Conv2d(3, image_size, kernel_size=4, stride=2, padding=1),
                nn.LeakyReLU(0.1, inplace=True)
            )
            
            self.layer2 = nn.Sequential(
                nn.Conv2d(image_size, image_size*2, kernel_size=4, stride=2, padding=1),
                nn.LeakyReLU(0.1, inplace=True)
            )
            
            self.layer3 = nn.Sequential(
                nn.Conv2d(image_size*2, image_size*4, kernel_size=4, stride=2, padding=1),
                nn.LeakyReLU(0.1, inplace=True)
            )
            
            self.layer4 = nn.Sequential(
                nn.Conv2d(image_size*4, image_size*8, kernel_size=4, stride=2, padding=1),
                nn.LeakyReLU(0.1, inplace=True)
            )
            
            self.last = nn.Sequential(
                nn.Conv2d(image_size*8, 1, kernel_size=4, stride=1),
                nn.Sigmoid()
            )
            
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.last(out)
        
        return out.view(-1, 1).squeeze(1)
                

In [5]:
def make_datapath_list():
    
    train_img_list = list()
    
    for img_idx in range(1,3000,1):
        img_path = '/kaggle/input/cat-and-dog/training_set/training_set/cats/cat.'+ str(img_idx)+'.jpg'
        train_img_list.append(img_path)
        
    return train_img_list


class ImageTransform():
    
    def __init__(self, mean, std):
        self.data_trandform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])
        
    def __call__(self, img):
        return self.data_trandform(img)
    
class GAN_Dataset(data.Dataset):
    
    def __init__(self, file_list, transform):
        self.file_list = file_list
        self.transform = transform
        
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        img_resize = img.resize((64, 64))
        
        img_transformed = self.transform(img_resize)
        
        return img_transformed

In [6]:
train_img_list = make_datapath_list()

mean = (0.5, )
std = (0.5, )
batch_size = 64

train_dataset = GAN_Dataset(train_img_list, ImageTransform(mean, std))
train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [7]:
G = Generator(z_dim=20, image_size=32)
D = Discriminator(z_dim=20, image_size=32)

In [8]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
        nn.init.constant_(m.bias.data, 0)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

G.apply(weights_init)
D.apply(weights_init)

Discriminator(
  (layer1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (layer3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (layer4): Sequential(
    (0): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (last): Sequential(
    (0): Conv2d(256, 1, kernel_size=(4, 4), stride=(1, 1))
    (1): Sigmoid()
  )
)

In [9]:
def train_model(G, D, dataloader, num_epochs):

    # GPUが使えるかを確認
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("使用デバイス：", device)
    
    # ラベルの定義（本物:1, 偽物:0）
    real_label = 1
    fake_label = 0

    # 最適化手法の設定
    g_lr, d_lr = 0.0001, 0.0004
    beta1, beta2 = 0.0, 0.9
    g_optimizer = torch.optim.Adam(G.parameters(), g_lr, [beta1, beta2])
    d_optimizer = torch.optim.Adam(D.parameters(), d_lr, [beta1, beta2])

    # 誤差関数を定義
    criterion = nn.BCELoss()

    # パラメータをハードコーディング
    z_dim = 20
    mini_batch_size = 64

    # ネットワークをGPUへ
    G.to(device)
    D.to(device)

    G.train()  # モデルを訓練モードに
    D.train()  # モデルを訓練モードに

    # ネットワークがある程度固定であれば、高速化させる
    torch.backends.cudnn.benchmark = True

    # 画像の枚数
    num_train_imgs = len(dataloader.dataset)
    batch_size = dataloader.batch_size

    # イテレーションカウンタをセット
    iteration = 1
    logs = []

    # epochのループ
    for epoch in range(num_epochs):

        epoch_g_loss = 0.0  # epochの損失和
        epoch_d_loss = 0.0  # epochの損失和

        print('-------------')
        print('Epoch {}/{}'.format(epoch, num_epochs))
        print('-------------')
        print('（train）')

        # データローダーからminibatchずつ取り出すループ
        for imgs in dataloader:
            # Discriminatorの学習
            if imgs.size()[0] == 1:
                continue

            imgs = imgs.to(device)

            # 正解ラベルと偽ラベルを作成
            mini_batch_size = imgs.size()[0]
            label_real = torch.full((mini_batch_size,), fill_value=real_label, dtype=torch.float, device=device)
            label_fake = torch.full((mini_batch_size,), fill_value=fake_label, dtype=torch.float, device=device)

            # 真の画像を判定
            d_out_real = D(imgs)

            # 偽の画像を生成して判定
            input_z = torch.randn(mini_batch_size, z_dim).to(device)
            input_z = input_z.view(input_z.size(0), input_z.size(1), 1, 1)
            fake_imags = G(input_z)
            d_out_fake = D(fake_imags)

            # 誤差を計算
            d_loss_real = criterion(d_out_real, label_real)
            d_loss_fake = criterion(d_out_fake, label_fake)
            d_loss = d_loss_real + d_loss_fake

            # バックプロパゲーション
            g_optimizer.zero_grad()
            d_optimizer.zero_grad()

            d_loss.backward()
            d_optimizer.step()

            # Generatorの学習
            # 偽の画像を生成
            input_z = torch.randn(mini_batch_size, z_dim).to(device)
            input_z = input_z.view(input_z.size(0), input_z.size(1), 1, 1)
            fake_images = G(input_z)
            d_out_fake = D(fake_images)

            # 誤差
            g_loss = criterion(d_out_fake, label_real)

            # バックプロパゲーション
            g_optimizer.zero_grad()
            d_optimizer.zero_grad()
            g_loss.backward()
            g_optimizer.step()

            # 記録
            epoch_d_loss += d_loss.item()
            epoch_g_loss += g_loss.item()
            iteration += 1

        # epochのphaseごとのlossと正解率
        print('-------------')
        print('epoch {} || Epoch_D_Loss:{:.4f} ||Epoch_G_Loss:{:.4f}'.format(
            epoch, epoch_d_loss/batch_size, epoch_g_loss/batch_size))

    return G, D

In [None]:
num_epochs = 150
G_update, D_update = train_model(G, D, dataloader=train_dataloader, num_epochs=num_epochs)

使用デバイス： cuda:0
-------------
Epoch 0/150
-------------
（train）
-------------
epoch 0 || Epoch_D_Loss:0.3074 ||Epoch_G_Loss:2.1382
-------------
Epoch 1/150
-------------
（train）
-------------
epoch 1 || Epoch_D_Loss:0.1993 ||Epoch_G_Loss:2.5707
-------------
Epoch 2/150
-------------
（train）
-------------
epoch 2 || Epoch_D_Loss:0.2875 ||Epoch_G_Loss:2.6366
-------------
Epoch 3/150
-------------
（train）
-------------
epoch 3 || Epoch_D_Loss:0.3171 ||Epoch_G_Loss:2.5466
-------------
Epoch 4/150
-------------
（train）
-------------
epoch 4 || Epoch_D_Loss:0.6079 ||Epoch_G_Loss:1.4271
-------------
Epoch 5/150
-------------
（train）
-------------
epoch 5 || Epoch_D_Loss:0.6346 ||Epoch_G_Loss:1.2776
-------------
Epoch 6/150
-------------
（train）
-------------
epoch 6 || Epoch_D_Loss:0.4739 ||Epoch_G_Loss:1.5952
-------------
Epoch 7/150
-------------
（train）
-------------
epoch 7 || Epoch_D_Loss:0.4647 ||Epoch_G_Loss:1.7563
-------------
Epoch 8/150
-------------
（train）
-------------
epo

In [None]:
import matplotlib.pyplot as plt

# 生成画像と訓練データを可視化する
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 入力の乱数生成
batch_size = 8
dim = 20
z = torch.randn(batch_size, dim)
z = z.view(z.size(0), z.size(1), 1, 1)

# 画像生成
G_update.eval()
fake_imags = G_update(z.to(device))

# 訓練データ
batch_iterator = iter(train_dataloader) 
imgs = next(batch_iterator)  


# 出力
fig = plt.figure(figsize=(15, 6))
for i in range(0, 5):
    # 上段に訓練データを
    plt.subplot(2, 5, i+1)
    plt.imshow(imgs[i][0].cpu().detach().numpy())

    # 下段に生成データを表示する
    plt.subplot(2, 5, 5+i+1)
    plt.imshow(fake_imags[i][0].cpu().detach().numpy())

In [None]:
#