In [1]:
#%matplotlib inline
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torcheval.metrics import FrechetInceptionDistance
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML

# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
torch.use_deterministic_algorithms(True) # Needed for reproducible results

Random Seed:  999


In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.datasets as dset
import torchvision.utils as vutils
import matplotlib.pyplot as plt
import numpy as np
import os

# Parametry
dataroot = "data"  # katalog z podfolderami obrazów kotów
image_size = 64
batch_size = 128
num_epochs = 30
lr = 2e-4
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # RGB → (0.5, 0.5, 0.5) jeśli kolor
])

dataset = dset.ImageFolder(root=dataroot, transform=transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [None]:
# We can use an image folder dataset the way we have it setup.
# Create the dataset
dataset = dset.ImageFolder(root=dataroot,
                           transform=transforms.Compose([
                               transforms.Resize(image_size),
                               transforms.CenterCrop(image_size),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                           ]))
# Create the dataloader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2)

# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

# Plot some training images
real_batch = next(iter(dataloader))
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))
plt.show()

NameError: name 'workers' is not defined

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.datasets as dset
import torchvision.utils as vutils
import matplotlib.pyplot as plt
import numpy as np
import os

# Parametry
dataroot = "data/cats"  # katalog z podfolderami obrazów kotów
image_size = 64
batch_size = 128
num_epochs = 30
lr = 2e-4
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.use_deterministic_algorithms(True)


In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Encoder(nn.Module):
    def __init__(self, in_channels=3, hidden_channels=128, latent_dim=64):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_channels, hidden_channels, 4, 2, 1),
            nn.ReLU(),
            nn.Conv2d(hidden_channels, hidden_channels, 4, 2, 1),
            nn.ReLU(),
            nn.Conv2d(hidden_channels, latent_dim, 1)
        )

    def forward(self, x):
        return self.net(x)

class Decoder(nn.Module):
    def __init__(self, latent_dim=64, hidden_channels=128, out_channels=3):
        super().__init__()
        self.net = nn.Sequential(
            nn.ConvTranspose2d(latent_dim, hidden_channels, 4, 2, 1),
            nn.ReLU(),
            nn.ConvTranspose2d(hidden_channels, hidden_channels, 4, 2, 1),
            nn.ReLU(),
            nn.Conv2d(hidden_channels, out_channels, 1),
            nn.Tanh()
        )

    def forward(self, x):
        return self.net(x)

class VectorQuantizer(nn.Module):
    def __init__(self, num_embeddings=512, embedding_dim=64, commitment_cost=0.25):
        super().__init__()
        self.embedding_dim = embedding_dim
        self.num_embeddings = num_embeddings
        self.commitment_cost = commitment_cost

        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
        self.embedding.weight.data.uniform_(-1/self.num_embeddings, 1/self.num_embeddings)

    def forward(self, z):
        z_flattened = z.permute(0, 2, 3, 1).contiguous().view(-1, self.embedding_dim)
        distances = (z_flattened.pow(2).sum(1, keepdim=True)
                     - 2 * z_flattened @ self.embedding.weight.t()
                     + self.embedding.weight.pow(2).sum(1))

        encoding_indices = torch.argmin(distances, dim=1)
        encodings = F.one_hot(encoding_indices, self.num_embeddings).float()

        quantized = encodings @ self.embedding.weight
        quantized = quantized.view(z.shape).permute(0, 3, 1, 2).contiguous()

        e_latent_loss = F.mse_loss(quantized.detach(), z)
        q_latent_loss = F.mse_loss(quantized, z.detach())
        loss = q_latent_loss + self.commitment_cost * e_latent_loss

        quantized = z + (quantized - z).detach()

        return quantized, loss

class VQVAE(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = Encoder()
        self.quantizer = VectorQuantizer()
        self.decoder = Decoder()

    def forward(self, x):
        z = self.encoder(x)
        quantized, vq_loss = self.quantizer(z)
        x_recon = self.decoder(quantized)
        return x_recon, vq_loss


In [12]:
model = VQVAE().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for epoch in range(num_epochs):
    for data in dataloader:
        x, _ = data
        x = x.to(device)

        x_recon, vq_loss = model(x)
        recon_loss = F.mse_loss(x_recon, x)
        loss = recon_loss + vq_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {loss.item():.4f}")


RuntimeError: Deterministic behavior was enabled with either `torch.use_deterministic_algorithms(True)` or `at::Context::setDeterministicAlgorithms(true)`, but this operation is not deterministic because it uses CuBLAS and you have CUDA >= 10.2. To enable deterministic behavior in this case, you must set an environment variable before running your PyTorch application: CUBLAS_WORKSPACE_CONFIG=:4096:8 or CUBLAS_WORKSPACE_CONFIG=:16:8. For more information, go to https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility

In [None]:
# Załaduj wytrenowany model
# model = VQVAE().to(device)
# model.load_state_dict(torch.load("vqvae_cat.pth"))
model.eval()

# Generuj losowe kody z codebooku
def generate_images(model, num_images=16):
    embedding_dim = model.quantizer.embedding_dim
    num_embeddings = model.quantizer.num_embeddings
    spatial_dim = (16, 16)  # zależnie od z→ downsampling 64x64 → 16x16

    with torch.no_grad():
        # Wylosuj indeksy
        random_indices = torch.randint(0, num_embeddings, (num_images, *spatial_dim)).to(device)
        quantized = model.quantizer.embedding(random_indices)
        quantized = quantized.permute(0, 3, 1, 2)  # (B, C, H, W)
        recon = model.decoder(quantized)

        grid = vutils.make_grid(recon, nrow=4, normalize=True, scale_each=True)
        plt.figure(figsize=(8,8))
        plt.axis('off')
        plt.title('Wygenerowane koty')
        plt.imshow(np.transpose(grid.cpu(), (1,2,0)))
        plt.show()

generate_images(model)


Starting Training Loop...
[0/30][0/234]	Loss_D: 1.8398	Loss_G: 1.9405	D(x): 0.2928	D(G(z)): 0.2715 / 0.1754
[0/30][50/234]	Loss_D: 0.0545	Loss_G: 4.7530	D(x): 0.9772	D(G(z)): 0.0166 / 0.0135
[0/30][100/234]	Loss_D: 0.4649	Loss_G: 4.4646	D(x): 0.8428	D(G(z)): 0.2098 / 0.0171
[0/30][150/234]	Loss_D: 0.5293	Loss_G: 2.7877	D(x): 0.8531	D(G(z)): 0.2588 / 0.0883
[0/30][200/234]	Loss_D: 0.2516	Loss_G: 6.4612	D(x): 0.9113	D(G(z)): 0.1287 / 0.0024
[1/30][0/234]	Loss_D: 0.5019	Loss_G: 9.0162	D(x): 0.9631	D(G(z)): 0.3260 / 0.0003
[1/30][50/234]	Loss_D: 1.1810	Loss_G: 5.1976	D(x): 0.4514	D(G(z)): 0.0009 / 0.0186
[1/30][100/234]	Loss_D: 0.6239	Loss_G: 3.6995	D(x): 0.7843	D(G(z)): 0.2212 / 0.0491
[1/30][150/234]	Loss_D: 0.5459	Loss_G: 5.2121	D(x): 0.6600	D(G(z)): 0.0243 / 0.0138
[1/30][200/234]	Loss_D: 0.4852	Loss_G: 5.6047	D(x): 0.8836	D(G(z)): 0.2530 / 0.0082
[2/30][0/234]	Loss_D: 0.8208	Loss_G: 7.4122	D(x): 0.8923	D(G(z)): 0.4202 / 0.0031
[2/30][50/234]	Loss_D: 1.0545	Loss_G: 3.0395	D(x): 0.4880	