# Vanilla GAN with Korean

### Lab Overview

img

### Prerequisite

In [66]:
import torch 
import os

# mps setting
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [67]:
# save model
model_dir = os.path.join('result/2/model')
os.makedirs(model_dir, exist_ok=True)

image_dir = os.path.join('result/2/sample')
os.makedirs(image_dir, exist_ok=True)

loss_dir = os.path.join('result/2/loss')
os.makedirs(loss_dir, exist_ok=True)

### Import Libraries

In [25]:
from torchvision import datasets
from torchvision.datasets import DatasetFolder
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor, Lambda
from torchvision import transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import numpy as np

### Generate Korean Font Image

In [37]:
import glob, io
from PIL import Image, ImageFont, ImageDraw

TXT_FILE = 'data/characters/50characters.txt'
FONTS_DIR = 'data/source'
IMAGE_DIR = 'data/target/2'

if not os.path.exists(IMAGE_DIR):
    os.makedirs(os.path.join(IMAGE_DIR))

In [38]:
IMAGE_WIDTH = 256
IMAGE_HEIGHT = 256

list_labels = []
with open(TXT_FILE, 'r', encoding='utf-8') as fr:
    for line in fr:
        list_labels.append(line.strip())

with io.open(TXT_FILE, 'r', encoding='utf-8') as f:
    labels = f.read().splitlines()

# Get a list of the fonts.
fonts = sorted(glob.glob(os.path.join(FONTS_DIR, '*.ttf')))
for f in fonts:
    filename = os.path.basename(f)
    filename_without_extension = os.path.splitext(filename)[0]
    print(filename_without_extension)

# Initialize numbers
total_count = 0
prev_count = 0
font_count = 1
char_no = 0
    
# Total number of font files is 
print('total number of fonts are ', len(fonts))

for character in labels:
    char_no += 1
        
    for font in fonts:
        total_count += 1

        image = Image.new('RGB', (IMAGE_WIDTH,IMAGE_HEIGHT), (255, 255, 255))
        w, h = image.size
                
        drawing = ImageDraw.Draw(image)
        font = ImageFont.truetype(font, 170)

        box = None
        new_box = drawing.textbbox((0, 0), character, font)
                
        new_w = new_box[2] - new_box[0]
        new_h = new_box[3] - new_box[1]
                
        box = new_box
        w = new_w
        h = new_h
                
        x = (IMAGE_WIDTH - w)//2 - box[0]
        y = (IMAGE_HEIGHT - h)//2 - box[1]

        drawing.text((x,y), character, fill=(0), font=font) 
        file_string = '{}.png'.format(hex(ord(character))[2:].upper())
        file_path = os.path.join(IMAGE_DIR, file_string)
        image.save(file_path, 'PNG')
        font_count += 1
    font_count = 1
char_no = 0
            
print('Finished generating {} images.'.format(total_count))

GothicA1-Regular
total number of fonts are  1
Finished generating 50 images.


### Datasets

In [53]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = [os.path.join(root_dir, file) for file in os.listdir(root_dir) if file.endswith('.png')]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image

transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

dataset = CustomDataset(root_dir='data/target/2', transform=transform)

batch_size = 64
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


### Hyperparameters

In [42]:
NOISE = 100
INPUT_SIZE = 256 * 256
BATCH_SIZE = 64
EPOCHS = 50

### Generator

In [63]:
class Generator(nn.Module):
    def __init__(self, NOISE):
        super(Generator, self).__init__()
        
        self.model = nn.Sequential(
            nn.Linear(NOISE, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024),
            nn.Linear(1024, 8 * 8 * 256),
            nn.ReLU(),
            nn.BatchNorm1d(8 * 8 * 256),
            nn.Unflatten(1, (256, 8, 8)),
            nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.ConvTranspose2d(64, channels, kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )
    
    def forward(self, z):
        img = self.model(z)
        return img

### Discriminator

In [64]:
class Discriminator(nn.Module):
    def __init__(self, INPUT_SIZE):
        super(Discriminator, self).__init__()
        
        self.model = nn.Sequential(
            nn.Conv2d(INPUT_SIZE, 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout2d(0.3),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout2d(0.3),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout2d(0.3),
            nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout2d(0.3),
            nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.model(x)
        return x

### Configurate Model

In [65]:
generator = Generator(NOISE).to(device)
print(f"Generate is using {device}.")

NameError: name 'channels' is not defined

In [58]:
discriminator = Discriminator(INPUT_SIZE).to(device)
print(f"Discriminator is using {device}")

Discriminator is using mps


### Loss and Optimizer

In [59]:
criterion = nn.BCELoss()
optimizer_discriminator = optim.Adam(discriminator.parameters(), lr=1e-4)
optimizer_generator = optim.Adam(generator.parameters(), lr=1e-4)

In [60]:
for param in discriminator.parameters():
    param.requires_grad = False

In [61]:
gan_input = torch.randn(BATCH_SIZE, NOISE).to(device)
x = generator(gan_input)
output = discriminator(x)

RuntimeError: Given groups=1, weight of size [64, 65536, 4, 4], expected input[64, 3, 32, 32] to have 65536 channels, but got 3 channels instead

### Visualize

In [50]:
def visualize_training(epoch, d_losses, g_losses, output_dir=image_dir, loss_dir=loss_dir):

    plt.figure(figsize=(8, 4))
    plt.plot(d_losses, label='Discriminator Loss')
    plt.plot(g_losses, label='Generatror Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()
    plt.close()
    
    print('epoch: {}, Discriminator Loss: {}, Generator Loss: {}'.format(epoch, np.asarray(d_losses).mean(), np.asarray(g_losses).mean()))
    
    #Visualize after creating sample data
    noise = torch.randn(24, NOISE).to(device)
    generated_images = generator(noise).cpu().detach().numpy()
    generated_images = generated_images.reshape(-1, 28, 28)
    
    plt.figure(figsize=(8, 4))
    for i in range(generated_images.shape[0]):
        plt.subplot(4, 6, i+1)
        plt.imshow(generated_images[i], interpolation='nearest', cmap='gray')
        plt.axis('off')
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f'generated_images_epoch_{epoch}.png'))
    plt.close()
    plt.show()


### Save

In [51]:
def save_training(epoch, d_losses, g_losses, output_dir=image_dir, loss_dir=loss_dir):
    plt.figure(figsize=(8, 4))
    plt.plot(d_losses, label='Discriminator Loss')
    plt.plot(g_losses, label='Generatror Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title(f'epoch: {epoch}, Discriminator Loss: {np.asarray(d_losses).mean():.4f}, Generator Loss: {np.asarray(g_losses).mean():.4f}')
    plt.savefig(os.path.join(loss_dir, f'generated_images_epoch_{epoch}.png'))
    plt.close()
        
    #Visualize after creating sample data
    noise = torch.randn(24, NOISE).to(device)
    generated_images = generator(noise).cpu().detach().numpy()
    generated_images = generated_images.reshape(-1, 28, 28)
    
    plt.figure(figsize=(8, 4))
    for i in range(generated_images.shape[0]):
        plt.subplot(4, 6, i+1)
        plt.imshow(generated_images[i], interpolation='nearest', cmap='gray')
        plt.axis('off')
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f'generated_images_epoch_{epoch}.png'))
    plt.close()


### Train

In [62]:
d_losses = []
g_losses = []

for epoch in range(1, EPOCHS + 1):
    for i, real_images in enumerate(train_loader):

        batch_size = real_images.size(0)
        real_images = real_images.view(batch_size, -1).to(device)

        # Real and fake labels
        real_labels = torch.ones(batch_size, 1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)

        # Train Discriminator
        for param in discriminator.parameters():
            param.requires_grad = True

        optimizer_discriminator.zero_grad()

        outputs = discriminator(real_images)
        d_loss_real = criterion(outputs, real_labels)
        d_loss_real.backward()

        z = torch.randn(batch_size, NOISE).to(device)

        fake_images = generator(z)
        outputs = discriminator(fake_images.detach())
        d_loss_fake = criterion(outputs, fake_labels)
        d_loss_fake.backward()

        optimizer_discriminator.step()

        # Train Generator
        for param in discriminator.parameters():
            param.requires_grad = False
        
        optimizer_generator.zero_grad()

        outputs = discriminator(fake_images)
        g_loss = criterion(outputs, real_labels)
        g_loss.backward()

        optimizer_generator.step()
    
    d_losses.append(d_loss_real.item() + d_loss_fake.item())
    g_losses.append(g_loss.item())

    save_training(epoch, d_losses, g_losses)

    if epoch == 1 or epoch % 10 == 0:
        visualize_training(epoch, d_losses, g_losses)
        torch.save(generator.state_dict(), os.path.join(model_dir, f'generator.pth'))
        torch.save(discriminator.state_dict(), os.path.join(model_dir, f'discriminator.pth'))

RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [50, 12288]