In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.utils import save_image


In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
os.makedirs("models", exist_ok=True)
os.makedirs("samples", exist_ok=True)


In [4]:
latent_dim = 100
num_classes = 10
batch_size = 128
epochs = 50
lr = 0.0002


In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=transform
)

dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


100%|██████████| 9.91M/9.91M [06:45<00:00, 24.5kB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 114kB/s]
100%|██████████| 1.65M/1.65M [00:11<00:00, 138kB/s] 
100%|██████████| 4.54k/4.54k [00:01<00:00, 2.69kB/s]


In [6]:
class LabelEmbedding(nn.Module):
    def __init__(self, num_classes, embed_dim):
        super().__init__()
        self.embed = nn.Embedding(num_classes, embed_dim)

    def forward(self, labels):
        return self.embed(labels)


In [7]:
class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.label_emb = nn.Embedding(num_classes, num_classes)

        self.model = nn.Sequential(
            nn.Linear(latent_dim + num_classes, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 1024),
            nn.ReLU(),
            nn.Linear(1024, 28 * 28),
            nn.Tanh()
        )

    def forward(self, z, labels):
        label_vec = self.label_emb(labels)
        x = torch.cat([z, label_vec], dim=1)
        img = self.model(x)
        return img.view(z.size(0), 1, 28, 28)


In [9]:
class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.label_emb = nn.Embedding(num_classes, num_classes)

        self.model = nn.Sequential(
            nn.Linear(28 * 28 + num_classes, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, img, labels):
        img = img.view(img.size(0), -1)
        label_vec = self.label_emb(labels)
        x = torch.cat([img, label_vec], dim=1)
        return self.model(x)


In [10]:
G = Generator().to(device)
D = Discriminator().to(device)

criterion = nn.BCELoss()

optimizer_G = optim.Adam(G.parameters(), lr=lr)
optimizer_D = optim.Adam(D.parameters(), lr=lr)


In [11]:
for epoch in range(epochs):
    for real_imgs, labels in dataloader:

        real_imgs = real_imgs.to(device)
        labels = labels.to(device)
        batch_size = real_imgs.size(0)

        real_targets = torch.ones(batch_size, 1).to(device)
        fake_targets = torch.zeros(batch_size, 1).to(device)

        # ---------------------
        # Train Discriminator
        # ---------------------
        optimizer_D.zero_grad()

        real_loss = criterion(D(real_imgs, labels), real_targets)

        z = torch.randn(batch_size, latent_dim).to(device)
        fake_imgs = G(z, labels).detach()
        fake_loss = criterion(D(fake_imgs, labels), fake_targets)

        d_loss = real_loss + fake_loss
        d_loss.backward()
        optimizer_D.step()

        # ---------------------
        # Train Generator
        # ---------------------
        optimizer_G.zero_grad()

        z = torch.randn(batch_size, latent_dim).to(device)
        gen_imgs = G(z, labels)
        g_loss = criterion(D(gen_imgs, labels), real_targets)

        g_loss.backward()
        optimizer_G.step()

    print(f"Epoch [{epoch+1}/{epochs}] | D Loss: {d_loss.item():.4f} | G Loss: {g_loss.item():.4f}")

    save_image(gen_imgs[:25], f"samples/epoch_{epoch+1}.png", nrow=5, normalize=True)


Epoch [1/50] | D Loss: 0.0364 | G Loss: 6.6943
Epoch [2/50] | D Loss: 0.0104 | G Loss: 5.7105
Epoch [3/50] | D Loss: 0.1913 | G Loss: 6.1253
Epoch [4/50] | D Loss: 0.3707 | G Loss: 6.5659
Epoch [5/50] | D Loss: 0.3656 | G Loss: 4.3880
Epoch [6/50] | D Loss: 1.3375 | G Loss: 2.8992
Epoch [7/50] | D Loss: 0.2784 | G Loss: 3.3498
Epoch [8/50] | D Loss: 0.8120 | G Loss: 2.7592
Epoch [9/50] | D Loss: 0.8987 | G Loss: 3.6008
Epoch [10/50] | D Loss: 0.4701 | G Loss: 2.6793
Epoch [11/50] | D Loss: 0.2739 | G Loss: 3.4367
Epoch [12/50] | D Loss: 0.2861 | G Loss: 4.2174
Epoch [13/50] | D Loss: 0.5788 | G Loss: 3.3956
Epoch [14/50] | D Loss: 0.5113 | G Loss: 3.7837
Epoch [15/50] | D Loss: 0.5762 | G Loss: 3.5801
Epoch [16/50] | D Loss: 0.1626 | G Loss: 5.6875
Epoch [17/50] | D Loss: 0.0205 | G Loss: 15.3533
Epoch [18/50] | D Loss: 0.0211 | G Loss: 4.5492
Epoch [19/50] | D Loss: 0.5022 | G Loss: 5.2427
Epoch [20/50] | D Loss: 0.2660 | G Loss: 11.8210
Epoch [21/50] | D Loss: 0.2032 | G Loss: 6.6112

In [12]:
torch.save(G.state_dict(), "models/generator.pth")
torch.save(D.state_dict(), "models/discriminator.pth")


In [13]:
import torch
import torch.nn as nn
from torchvision.utils import save_image
import os


In [14]:
device = "cuda" if torch.cuda.is_available() else "cpu"
latent_dim = 100
num_classes = 10
os.makedirs("generated", exist_ok=True)


In [15]:
class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.label_emb = nn.Embedding(num_classes, num_classes)

        self.model = nn.Sequential(
            nn.Linear(latent_dim + num_classes, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 1024),
            nn.ReLU(),
            nn.Linear(1024, 28 * 28),
            nn.Tanh()
        )

    def forward(self, z, labels):
        label_vec = self.label_emb(labels)
        x = torch.cat([z, label_vec], dim=1)
        img = self.model(x)
        return img.view(z.size(0), 1, 28, 28)


In [16]:
G = Generator().to(device)
G.load_state_dict(torch.load("models/generator.pth", map_location=device))
G.eval()


Generator(
  (label_emb): Embedding(10, 10)
  (model): Sequential(
    (0): Linear(in_features=110, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=1024, bias=True)
    (5): ReLU()
    (6): Linear(in_features=1024, out_features=784, bias=True)
    (7): Tanh()
  )
)

In [None]:
with torch.no_grad():
    n = 10
    z = torch.randn(n, latent_dim).to(device)
    labels = torch.tensor([0,1,2,3,4,5,6,7,8,9]).to(device)

    images = G(z, labels)
    save_image(images, "generated/conditional_digits.png", nrow=5, normalize=True)


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 10 but got size 5 for tensor number 1 in the list.