<a href="https://colab.research.google.com/github/HuanAII/GAN/blob/main/GAN_emoji.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn, Tensor
import numpy as np
from torchvision.utils import save_image

torch.manual_seed(0)

device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

In [3]:
! pip install datasets

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m30.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.12.0-py3-none-any.

# 1. Dataset

In [4]:
from torch.utils.data import Dataset
import torchvision
from datasets import load_dataset

img_size = 28

transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize((img_size, img_size)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                     std=[0.5, 0.5, 0.5]) # normalize anh theo 3 kenh mau
])

hf_emoji_dataset = load_dataset("valhalla/emoji-dataset")

class EmojiDataset(Dataset):
    def __init__(self, hf_dataset, transform=None):
        self.dataset = hf_dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        example = self.dataset[idx]
        image = example["image"]
        label = example["text"]

        if self.transform:
            image = self.transform(image)

        return image, label


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


dataset_infos.json:   0%|          | 0.00/720 [00:00<?, ?B/s]

(…)-00000-of-00001-38cc4fa96c139e86.parquet:   0%|          | 0.00/139M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2749 [00:00<?, ? examples/s]

In [5]:
BATCH_SIZE = 128

train_dataset = EmojiDataset(hf_emoji_dataset["train"], transform=transform)
dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# 2. Model

In [6]:
channels = 3
img_shape = (channels, img_size, img_size)
latent_dim = 100

In [8]:
# Generator Model
class Generator(nn.Module):
    def __init__(self, z_dim=100):
        super().__init__()
        self.block1 = nn.Sequential(
            nn.Linear(z_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        self.block2 = nn.Sequential(
            nn.Linear(256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
        )
        self.block3 = nn.Sequential(
            nn.Linear(512, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
        )
        self.block4 = nn.Sequential(
            nn.Linear(1024, channels*img_size * img_size),
            nn.Tanh()
        )
    def forward(self, noise):
        x = self.block1(noise)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        return x.view(-1, channels, img_size, img_size) # resize anh lai thanh (batch_size , channel , img_size , img_size)


In [9]:
class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.block1 = nn.Sequential(
            nn.Linear(channels * img_size * img_size, 1024),
            nn.LeakyReLU(0.2),
        )
        self.block2 = nn.Sequential(
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2),
        )
        self.block3 = nn.Sequential(
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
        )
        self.block4 = nn.Sequential(
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, image):
        x = image.view(image.shape[0], -1)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        return x

In [10]:
generator = Generator()
discriminator = Discriminator()

In [11]:
generator.to(device)

Generator(
  (block1): Sequential(
    (0): Linear(in_features=100, out_features=256, bias=True)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (block2): Sequential(
    (0): Linear(in_features=256, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (block3): Sequential(
    (0): Linear(in_features=512, out_features=1024, bias=True)
    (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (block4): Sequential(
    (0): Linear(in_features=1024, out_features=2352, bias=True)
    (1): Tanh()
  )
)

In [12]:
discriminator.to(device)

Discriminator(
  (block1): Sequential(
    (0): Linear(in_features=2352, out_features=1024, bias=True)
    (1): LeakyReLU(negative_slope=0.2)
  )
  (block2): Sequential(
    (0): Linear(in_features=1024, out_features=512, bias=True)
    (1): LeakyReLU(negative_slope=0.2)
  )
  (block3): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.2)
  )
  (block4): Sequential(
    (0): Linear(in_features=256, out_features=1, bias=True)
    (1): Sigmoid()
  )
)

# 3. Training

In [14]:
import os
OUTPUT = "gan_emoji_output"
# clear all images
os.makedirs(OUTPUT, exist_ok=True)
for f in os.listdir(OUTPUT):
    os.remove(os.path.join(OUTPUT, f))

save_interval = 10

In [None]:
EPOCHS = 100

criterion = nn.BCELoss()
optimizer_G = torch.optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))


hist = {
        "train_G_loss": [],
        "train_D_loss": [],
    }

for epoch in range(EPOCHS):
    running_G_loss = 0.0
    running_D_loss = 0.0

    for i, (imgs, _) in enumerate(dataloader):

        real_imgs = imgs.to(device)
        valid = torch.ones(imgs.shape[0], 1).to(device)
        fake = torch.zeros(imgs.shape[0], 1).to(device)

        # --- Train Generator ---
        optimizer_G.zero_grad()
        # Noise input for Generator
        z = Tensor(np.random.normal(0, 1, (imgs.shape[0], latent_dim))).to(device)

        gen_imgs = generator(z)
        G_loss = criterion(discriminator(gen_imgs), valid)
        running_G_loss += G_loss.item()

        G_loss.backward()
        optimizer_G.step()

        # --- Train Discriminator ---
        optimizer_D.zero_grad()
        real_loss = criterion(discriminator(real_imgs), valid)
        fake_loss = criterion(discriminator(gen_imgs.detach()), fake)
        D_loss = (real_loss + fake_loss) / 2
        running_D_loss += D_loss.item()

        D_loss.backward()
        optimizer_D.step()

    epoch_G_loss = running_G_loss / len(dataloader)
    epoch_D_loss = running_D_loss / len(dataloader)

    print(f"Epoch [{epoch + 1}/{EPOCHS}], Train G Loss: {epoch_G_loss:.4f}, Train D Loss: {epoch_D_loss:.4f}")

    hist["train_G_loss"].append(epoch_G_loss)
    hist["train_D_loss"].append(epoch_D_loss)

    save_image(gen_imgs.data[:25], f"{OUTPUT}/epoch_{epoch}.png", nrow=5, normalize=True)


Epoch [1/100], Train G Loss: 0.7607, Train D Loss: 0.5196
Epoch [2/100], Train G Loss: 2.1450, Train D Loss: 0.1926
Epoch [3/100], Train G Loss: 2.8922, Train D Loss: 0.3803
Epoch [4/100], Train G Loss: 1.3593, Train D Loss: 0.5659


In [None]:
# save model ckpt
torch.save(generator.state_dict(), "emoji_generator.pth")
torch.save(discriminator.state_dict(), "emoji_discriminator.pth")

In [None]:
import os
import glob
from PIL import Image

# Get all epoch images sorted by number
image_files = sorted(glob.glob(os.path.join(OUTPUT, "epoch_*.png")),
                     key=lambda x: int(os.path.basename(x).split("_")[1].split(".")[0]))

# Load images
images = [Image.open(img) for idx, img in enumerate(image_files) if idx % 5 == 0]

# Determine grid size
num_images = len(images)
cols = 5
rows = num_images // cols

# Image size (assumes all images are the same size)
img_width, img_height = images[0].size
padding = 10  # Space between images

# Calculate total canvas size
grid_width = cols * img_width + (cols - 1) * padding
grid_height = rows * img_height + (rows - 1) * padding

# Create a blank canvas
grid_img = Image.new("RGB", (grid_width, grid_height), "white")

# Paste images into the grid with padding
for i, img in enumerate(images):
    x = (i % cols) * (img_width + padding)
    y = (i // cols) * (img_height + padding)
    grid_img.paste(img, (x, y))

# Save and show the final grid image
grid_img.save("gan_emoji_grid.png")
grid_img.show()



