In [19]:
%%writefile generator.py
import torch
import torch.nn as nn

class Generator(nn.Module):
    """
    A standard Generator model for a Generative Adversarial Network (GAN).

    The generator's job is to create realistic images from a random noise
    vector (latent space). Its architecture uses transposed convolutions to
    upsample the noise vector into an image-sized tensor.

    Args:
        noise_dim (int): The dimension of the input latent/noise vector.
        image_channels (int): The number of channels for the output image.
                              1 for grayscale, 3 for RGB.
        hidden_dim (int): The feature dimension size for the intermediate layers.
    """
    def __init__(self, noise_dim, image_channels, hidden_dim=64):
        super(Generator, self).__init__()
        self.noise_dim = noise_dim
        self.image_channels = image_channels
        self.hidden_dim = hidden_dim

        # The neural network architecture is defined here.
        self.model = nn.Sequential(
            # Input: (batch_size, noise_dim, 1, 1)
            # This block transforms the noise vector into a 4x4 feature map.
            nn.ConvTranspose2d(self.noise_dim, self.hidden_dim * 8, kernel_size=4, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(self.hidden_dim * 8),
            nn.ReLU(True),
            # Output: (batch_size, hidden_dim * 8, 4, 4)

            # This block upsamples to an 8x8 feature map.
            nn.ConvTranspose2d(self.hidden_dim * 8, self.hidden_dim * 4, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(self.hidden_dim * 4),
            nn.ReLU(True),
            # Output: (batch_size, hidden_dim * 4, 8, 8)

            # This block upsamples to a 16x16 feature map.
            nn.ConvTranspose2d(self.hidden_dim * 4, self.hidden_dim * 2, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(self.hidden_dim * 2),
            nn.ReLU(True),
            # Output: (batch_size, hidden_dim * 2, 16, 16)

            # This block upsamples to a 32x32 feature map.
            nn.ConvTranspose2d(self.hidden_dim * 2, self.hidden_dim, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(self.hidden_dim),
            nn.ReLU(True),
            # Output: (batch_size, hidden_dim, 32, 32)

            # This final block produces the 64x64 image.
            nn.ConvTranspose2d(self.hidden_dim, self.image_channels, kernel_size=4, stride=2, padding=1, bias=False),
            # The Tanh activation function is crucial here. It squashes the output
            # pixel values to be in the range [-1, 1]. This matches the normalization
            # we applied to the real images in the DataLoader.
            nn.Tanh()
            # Output: (batch_size, image_channels, 64, 64)
        )

    def forward(self, x):
        """
        Defines the forward pass of the generator.

        Args:
            x (torch.Tensor): The input noise tensor.

        Returns:
            torch.Tensor: A tensor representing the generated image.
        """
        return self.model(x)

if __name__ == '__main__':
    # --- Example Usage ---
    NOISE_DIM = 100
    IMG_CHANNELS = 3
    IMG_SIZE = 64
    BATCH_SIZE = 128

    # Instantiate the generator
    generator = Generator(noise_dim=NOISE_DIM, image_channels=IMG_CHANNELS)
    print("--- Generator Architecture ---")
    print(generator)

    # Create a batch of random noise vectors.
    # The noise is typically sampled from a standard normal distribution.
    # The shape needs to be (batch_size, noise_dim, 1, 1) for the first
    # ConvTranspose2d layer.
    noise = torch.randn(BATCH_SIZE, NOISE_DIM, 1, 1)

    # Pass the noise through the generator to create fake images
    try:
        fake_images = generator(noise)
        print("\n--- Model Forward Pass Successful ---")
        print(f"Input noise shape: {noise.shape}")
        print(f"Output image shape: {fake_images.shape}") # Should be (BATCH_SIZE, IMG_CHANNELS, IMG_SIZE, IMG_SIZE)
        print(f"Output value range: Min={fake_images.min():.2f}, Max={fake_images.max():.2f}")
    except RuntimeError as e:
        print(f"\n--- An error occurred ---")
        print(e)


Overwriting generator.py


In [20]:

import torch
import torch.nn as nn

class Discriminator(nn.Module):
    """
    A standard Discriminator model for a Generative Adversarial Network (GAN).

    The discriminator's job is to distinguish between real images from a dataset
    and fake images generated by the Generator. It's essentially a binary
    classifier.

    Args:
        image_channels (int): The number of channels in the input image.
                              1 for grayscale, 3 for RGB.
        hidden_dim (int): The feature dimension size for the intermediate layers.
    """
    def __init__(self, image_channels, hidden_dim=64):
        super(Discriminator, self).__init__()
        self.image_channels = image_channels
        self.hidden_dim = hidden_dim

        # The neural network architecture is defined here.
        self.model = nn.Sequential(
            # Input: (batch_size, image_channels, 64, 64)
            nn.Conv2d(self.image_channels, self.hidden_dim, kernel_size=4, stride=2, padding=1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # Output: (batch_size, hidden_dim, 32, 32)

            nn.Conv2d(self.hidden_dim, self.hidden_dim * 2, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(self.hidden_dim * 2), # Correctly matches hidden_dim * 2 (128)
            nn.LeakyReLU(0.2, inplace=True),
            # Output: (batch_size, hidden_dim * 2, 16, 16)

            nn.Conv2d(self.hidden_dim * 2, self.hidden_dim * 4, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(self.hidden_dim * 4), # Correctly matches hidden_dim * 4 (256)
            nn.LeakyReLU(0.2, inplace=True),
            # Output: (batch_size, hidden_dim * 4, 8, 8)

            nn.Conv2d(self.hidden_dim * 4, self.hidden_dim * 8, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(self.hidden_dim * 8), # Correctly matches hidden_dim * 8 (512)
            nn.LeakyReLU(0.2, inplace=True),
            # Output: (batch_size, hidden_dim * 8, 4, 4)

            # Final classification layer to output a single probability score.
            nn.Conv2d(self.hidden_dim * 8, 1, kernel_size=4, stride=1, padding=0, bias=False),
            nn.Sigmoid()
            # Output: (batch_size, 1, 1, 1)
        )

    def forward(self, x):
        """
        Defines the forward pass of the discriminator.

        Args:
            x (torch.Tensor): The input tensor (a batch of images).

        Returns:
            torch.Tensor: A tensor containing the probability score for each image.
        """
        return self.model(x)

if __name__ == '__main__':
    # --- Example Usage ---
    IMG_CHANNELS = 3
    IMG_SIZE = 64
    BATCH_SIZE = 128

    discriminator = Discriminator(image_channels=IMG_CHANNELS)
    print("--- Discriminator Architecture ---")
    print(discriminator)

    fake_images = torch.randn(BATCH_SIZE, IMG_CHANNELS, IMG_SIZE, IMG_SIZE)

    # This line should now work without error
    try:
        output = discriminator(fake_images)
        print("\n--- Model Forward Pass Successful ---")
        print(f"Input shape: {fake_images.shape}")
        print(f"Output shape: {output.shape}")
    except RuntimeError as e:
        print(f"\n--- An error occurred ---")
        print(e)



--- Discriminator Architecture ---
Discriminator(
  (model): Sequential(
    (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2, inplace=True)
    (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2, inplace=True)
    (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (9): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2, inplace=True)
    (11): Conv2d(512, 1, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (12): Sigmoid()


In [21]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image

# --- Image Transformations for Grayscale NPY Data ---
# The Quick, Draw! data is 28x28 grayscale. Our GAN expects 64x64 3-channel images.
# This transform pipeline will handle the conversion.
image_transforms = transforms.Compose([
    # We first need to convert the NumPy array to a PIL Image, which the transforms expect.
    # This is done inside the Dataset class.

    transforms.Resize(64),          # Upsample the image from 28x28 to 64x64
    transforms.CenterCrop(64),      # Ensure it's exactly 64x64

    # This is a key step: The original data is grayscale (1 channel).
    # Our GAN discriminator expects 3 channels (RGB).
    # This transform converts the grayscale image to a 3-channel image by duplicating the channel.
    transforms.Grayscale(num_output_channels=3),

    transforms.ToTensor(),          # Convert image to a PyTorch Tensor (values 0-1)

    # Normalize the tensor to a range of [-1, 1] to match the generator's output
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


class NpyBitmapDataset(Dataset):
    """
    Custom PyTorch Dataset for loading images from a "Quick, Draw!" .npy file.

    Args:
        npy_path (str): The path to the .npy file.
        transform (callable, optional): A function/transform to apply to each image.
    """
    def __init__(self, npy_path, transform=None):
        self.transform = transform

        try:
            # Load the entire dataset from the .npy file into memory.
            # The data is expected to be uint8 (0-255).
            self.data = np.load(npy_path)
            print(f"Successfully loaded {npy_path}.")
            print(f"Dataset shape: {self.data.shape}") # Should be (num_images, 784)
            print(f"Data type: {self.data.dtype}")

            # Add a check for the expected data size based on the potential reshape
            expected_size = self.data.shape[0] * 28 * 28
            print(f"Expected size for reshape (num_images * 28 * 28): {expected_size}")

        except FileNotFoundError:
            print(f"Error: The file was not found at {npy_path}")
            print("Please make sure the file exists and the path is correct.")
            self.data = np.array([]) # Create an empty array to avoid crashing
        except Exception as e:
            print(f"An error occurred while loading the file: {e}")
            self.data = np.array([])

    def __len__(self):
        """Returns the total number of images in the dataset."""
        return self.data.shape[0]

    def __getitem__(self, idx):
        """
        Retrieves an image by its index, reshapes it, and applies transformations.
        """
        # Get the flattened 784-pixel data for the given index
        flat_image = self.data[idx]

        # Reshape the data into a 28x28 pixel grayscale image
        image_2d = flat_image.reshape(28, 28)

        # Convert the NumPy array to a PIL Image.
        # Transforms like Resize work best with PIL Images.
        #
        image = Image.fromarray(image_2d, mode='L') # 'L' mode is for grayscale

        # Apply the transformations if they are defined
        if self.transform:
            image = self.transform(image)

        return image

if __name__ == '__main__':
    # --- Example Usage ---
    # This block demonstrates how to use the NpyBitmapDataset class.

    # IMPORTANT: Update this path to where you have stored the .npy file.
    NPY_FILE_PATH = r'/content/full_numpy_bitmap_camel.npy'

    print(f"--- Testing NpyBitmapDataset with file: {NPY_FILE_PATH} ---")

    if os.path.exists(NPY_FILE_PATH):
        # Create an instance of the dataset
        dataset = NpyBitmapDataset(npy_path=NPY_FILE_PATH, transform=image_transforms)

        if len(dataset) > 0:
            # Retrieve the first item from the dataset to test it
            first_image = dataset[0]

            print("\n--- Dataset Test Successful ---")
            print(f"Total number of images found: {len(dataset)}")
            print(f"Shape of a single transformed image tensor: {first_image.shape}") # Should be (3, 64, 64)
            print(f"Data type of the tensor: {first_image.dtype}")
            print(f"Min value in the tensor: {first_image.min():.2f}") # Should be ~ -1.0
            print(f"Max value in the tensor: {first_image.max():.2f}") # Should be ~ 1.0
        else:
            print("\nDataset was loaded but contains no data. Please check the .npy file.")
    else:
        print("\n--- Test Failed ---")
        print(f"The file was not found. Please update the NPY_FILE_PATH variable in the script.")

--- Testing NpyBitmapDataset with file: /content/full_numpy_bitmap_camel.npy ---
An error occurred while loading the file: cannot reshape array of size 91226032 into shape (121399,784)

Dataset was loaded but contains no data. Please check the .npy file.


In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.utils as vutils
from torch.utils.data import DataLoader
import os

# Import the model and data loader classes we've created
from generator import Generator
from discriminator import Discriminator
from npy_data_loader import NpyBitmapDataset, image_transforms


# --- Hyperparameters ---
# It's good practice to keep all hyperparameters in one place.
LEARNING_RATE = 0.0002
BETA1 = 0.5  # Recommended for Adam optimizer in DCGAN paper
BATCH_SIZE = 128
IMAGE_SIZE = 64
IMAGE_CHANNELS = 3
NOISE_DIM = 100
NUM_EPOCHS = 25 # Increase this for better results
HIDDEN_DIM = 64

# --- Setup for Data and Directories ---
print("--- Setting up environment ---")

# Create a dummy image directory for demonstration
# In a real scenario, you would point this to your actual dataset folder.
NPY_FILE = r'/content/full_numpy_bitmap_camel.npy'
# Extract the directory path from the file path
DATA_DIR = os.path.dirname(NPY_FILE)

# Check if the data directory exists, and create it if it doesn't
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)
    print(f"Created data directory at '{DATA_DIR}'. Please place your .npy file here.")

# Create directories to save generated images and model checkpoints
os.makedirs('results/real', exist_ok=True)
os.makedirs('results/fake', exist_ok=True)
os.makedirs('models', exist_ok=True)

# Set the device (use GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# --- Data Loading ---
# Only attempt to load the dataset if the NPY file exists
if os.path.exists(NPY_FILE):
    dataset = NpyBitmapDataset(npy_path=NPY_FILE, transform=image_transforms)
    # Only create DataLoader if the dataset is not empty
    if len(dataset) > 0:
        dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
        print("Data loader created.")
    else:
        dataloader = None # Set dataloader to None if dataset is empty
        print("Dataset is empty. Data loader was not created.")
else:
    dataset = None # Set dataset to None if the file doesn't exist
    dataloader = None # Set dataloader to None if the file doesn't exist
    print(f"Error: The file was not found at {NPY_FILE}. Data loader was not created.")


# --- Model Initialization ---
# Create instances of the Generator and Discriminator
generator = Generator(NOISE_DIM, IMAGE_CHANNELS, HIDDEN_DIM).to(device)
discriminator = Discriminator(IMAGE_CHANNELS, HIDDEN_DIM).to(device)

# Custom weight initialization as suggested in the DCGAN paper
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

generator.apply(weights_init)
discriminator.apply(weights_init)
print("Models created and weights initialized.")

# --- Optimizers and Loss Function ---
# Binary Cross-Entropy loss is standard for GANs
criterion = nn.BCELoss()

# We need separate optimizers for the generator and discriminator
d_optimizer = optim.Adam(discriminator.parameters(), lr=LEARNING_RATE, betas=(BETA1, 0.999))
g_optimizer = optim.Adam(generator.parameters(), lr=LEARNING_RATE, betas=(BETA1, 0.999))
print("Optimizers and loss function defined.")

# Create a fixed noise vector to see how the generator improves over time
fixed_noise = torch.randn(64, NOISE_DIM, 1, 1, device=device)

# --- Training Loop ---
print("\n--- Starting Training Loop ---")
# Only start training if the dataloader was successfully created
if dataloader is not None:
    for epoch in range(NUM_EPOCHS):
        for i, real_images in enumerate(dataloader, 0):
            # real_images is a batch of images from our dataset

            ############################
            # (1) Update Discriminator #
            ############################

            # --- Train with real images ---
            discriminator.zero_grad()
            real_images = real_images.to(device)
            batch_size = real_images.size(0)

            # Create labels for real images (all 1s)
            real_labels = torch.ones(batch_size, device=device)

            # Forward pass real batch through Discriminator
            d_output_real = discriminator(real_images).view(-1)
            # Calculate loss on all-real batch
            d_loss_real = criterion(d_output_real, real_labels)
            d_loss_real.backward()

            # --- Train with fake images ---
            # Generate a batch of noise vectors
            noise = torch.randn(batch_size, NOISE_DIM, 1, 1, device=device)
            # Generate fake images with the generator
            fake_images = generator(noise)

            # Create labels for fake images (all 0s)
            fake_labels = torch.zeros(batch_size, device=device)

            # Classify fake images with Discriminator.
            # Use .detach() to avoid backpropagating through the Generator
            d_output_fake = discriminator(fake_images.detach()).view(-1)
            d_loss_fake = criterion(d_output_fake, fake_labels)
            d_loss_fake.backward()

            # Update Discriminator
            d_optimizer.step()

            # Total discriminator loss
            d_loss = d_loss_real + d_loss_fake

            ############################
            # (2) Update Generator     #
            ############################
            generator.zero_grad()

            # We need to re-classify the fake images with the updated discriminator
            d_output_for_g = discriminator(fake_images).view(-1)

            # Generator's goal is to make the discriminator think its images are real.
            # So, we calculate its loss using real_labels (all 1s).
            g_loss = criterion(d_output_for_g, real_labels)

            # Calculate gradients for generator
            g_loss.backward()

            # Update Generator
            g_optimizer.step()

            # --- Logging and Visualization ---
            if i % 50 == 0:
                print(
                    f'Epoch [{epoch+1}/{NUM_EPOCHS}] | Batch [{i}/{len(dataloader)}] | '
                    f'D_loss: {d_loss.item():.4f} | G_loss: {g_loss.item():.4f}'
                )

        # After each epoch, save the generated images from the fixed_noise vector
        with torch.no_grad():
            fake_samples = generator(fixed_noise).detach().cpu()

        # Save a grid of real images from the last batch
        vutils.save_image(real_images, f"results/real/epoch_{epoch+1}.png", normalize=True)
        # Save a grid of the generated fake images
        vutils.save_image(fake_samples, f"results/fake/epoch_{epoch+1}.png", normalize=True)

        # Save model checkpoints periodically
        if (epoch + 1) % 5 == 0:
            torch.save(generator.state_dict(), f'models/generator_epoch_{epoch+1}.pth')
            torch.save(discriminator.state_dict(), f'models/discriminator_epoch_{epoch+1}.pth')
            print(f"Saved models at epoch {epoch+1}")
else:
    print("\nSkipping training as the data loader was not created.")

print("\n--- Training Finished ---")

--- Setting up environment ---
Using device: cuda
Successfully loaded /content/full_numpy_bitmap_camel.npy.
Dataset shape: (121399, 784)
Data type: uint8
Data loader created.
Models created and weights initialized.
Optimizers and loss function defined.

--- Starting Training Loop ---
Epoch [1/25] | Batch [0/949] | D_loss: 1.5534 | G_loss: 4.7227
Epoch [1/25] | Batch [50/949] | D_loss: 0.0007 | G_loss: 15.4580
Epoch [1/25] | Batch [100/949] | D_loss: 0.1310 | G_loss: 26.0599
Epoch [1/25] | Batch [150/949] | D_loss: 0.1184 | G_loss: 15.1793
Epoch [1/25] | Batch [200/949] | D_loss: 0.1708 | G_loss: 5.1316
Epoch [1/25] | Batch [250/949] | D_loss: 0.0694 | G_loss: 10.6772
Epoch [1/25] | Batch [300/949] | D_loss: 0.4936 | G_loss: 6.8403
Epoch [1/25] | Batch [350/949] | D_loss: 0.3041 | G_loss: 2.8113
Epoch [1/25] | Batch [400/949] | D_loss: 0.2260 | G_loss: 4.3807
Epoch [1/25] | Batch [450/949] | D_loss: 0.1266 | G_loss: 4.2746
Epoch [1/25] | Batch [500/949] | D_loss: 0.1067 | G_loss: 4.4455

In [23]:
import numpy as np

npy_file_path = "/content/full_numpy_bitmap_camel.npy"

try:
    # Load the data from the .npy file
    data = np.load(npy_file_path)

    print(f"Successfully loaded the file: {npy_file_path}")
    print(f"Data Type: {data.dtype}")
    print(f"Shape: {data.shape}")

    # You can add more analysis here if needed, e.g., display the first few rows:
    # print("\nFirst 5 rows of the data:")
    # print(data[:5])

except FileNotFoundError:
    print(f"Error: The file was not found at {npy_file_path}")
    print("Please make sure the file exists at the specified path.")
except Exception as e:
    print(f"An error occurred while loading or analyzing the file: {e}")

Successfully loaded the file: /content/full_numpy_bitmap_camel.npy
Data Type: uint8
Shape: (121399, 784)
