In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Encoder(nn.Module):
    def __init__(self, latent_dim):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=4, stride=2, padding=1)  # Input: RGB
        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1)
        self.fc_mu = nn.Linear(256 * 4 * 4, latent_dim)
        self.fc_logvar = nn.Linear(256 * 4 * 4, latent_dim)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = x.view(x.size(0), -1)  # Flatten for linear layers
        mu = self.fc_mu(x)
        logvar = self.fc_logvar(x)
        return mu, logvar


class Decoder(nn.Module):
    def __init__(self, latent_dim):
        super(Decoder, self).__init__()
        self.fc = nn.Linear(latent_dim, 256 * 4 * 4)
        self.deconv1 = nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1)
        self.deconv2 = nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1)
        self.deconv3 = nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1)
        self.deconv4 = nn.ConvTranspose2d(32, 3, kernel_size=4, stride=2, padding=1)  # Output: RGB

    def forward(self, z):
        z = self.fc(z)
        z = z.view(z.size(0), 256, 4, 4)  # Reshape for deconvolutions
        z = F.relu(self.deconv1(z))
        z = F.relu(self.deconv2(z))
        z = F.relu(self.deconv3(z))
        recon_x = torch.sigmoid(self.deconv4(z))  # Scale to [0, 1]
        return recon_x


class VAE(nn.Module):
    def __init__(self, latent_dim):
        super(VAE, self).__init__()
        self.encoder = Encoder(latent_dim)
        self.decoder = Decoder(latent_dim)

    def forward(self, x):
        mu, logvar = self.encoder(x)
        std = torch.exp(0.5 * logvar)
        z = mu + std * torch.randn_like(std)  # Reparameterization trick
        recon_x = self.decoder(z)
        return recon_x, mu, logvar


In [2]:
# Loss function combining reconstruction loss and KL divergence
def loss_function(recon_x, x, mu, logvar):
    # Reconstruction loss (BCE or MSE depending on dataset)
    recon_loss = F.mse_loss(recon_x, x, reduction='sum')  # Use MSE for continuous images
    # KL divergence loss
    kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kl_loss


In [3]:

# Training loop
def train_vae(vae, dataloader, epochs, latent_dim, device):
    optimizer = torch.optim.Adam(vae.parameters(), lr=1e-3)
    vae = vae.to(device)
    vae.train()

    for epoch in range(epochs):
        total_loss = 0
        for images, _ in dataloader:
            images = images.to(device)
            optimizer.zero_grad()

            recon_images, mu, logvar = vae(images)
            loss = loss_function(recon_images, images, mu, logvar)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(dataloader.dataset):.4f}")

    return vae

In [8]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Updated device setting for CPU
device = torch.device("cpu")  # Set to 'cpu'

transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),  # Augmentation: Horizontal flipping
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

dataset = datasets.ImageFolder(root="photos_classes", transform=transform)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [9]:
# Generate images for evaluation
def generate_and_save_images(vae, num_samples, latent_dim, device):
    vae.eval()
    with torch.no_grad():
        z = torch.randn(num_samples, latent_dim).to(device)
        generated_images = vae.decoder(z).cpu().detach()
    return generated_images


In [10]:
# Display generated images
def display_last_generated_images(images, num_last=5):
    if images.max() > 1:
        images = images / 255.0  # Normalize for display
    images = images.cpu().detach()

    if images.shape[1] == 1:  # If grayscale, repeat to create RGB
        images = images.repeat(1, 3, 1, 1)

    images = images[-num_last:]  # Get the last N images
    plt.figure(figsize=(15, 5))
    for i, image in enumerate(images):
        plt.subplot(1, num_last, i + 1)
        plt.imshow(image.permute(1, 2, 0))
        plt.axis("off")
    plt.tight_layout()
    plt.show()

In [11]:
# Main training and evaluation
if __name__ == "__main__":
    latent_dim = 64
    epochs = 200
    num_samples = 500

    # Initialize and train VAE
    vae = VAE(latent_dim=latent_dim)
    vae = train_vae(vae, dataloader, epochs=epochs, latent_dim=latent_dim, device=device)

    # Generate and display the last 5 images
    generated_images = generate_and_save_images(vae, num_samples=num_samples, latent_dim=latent_dim, device=device)

Epoch 1/50, Loss: 8716.5284
Epoch 2/50, Loss: 7814.0405
Epoch 3/50, Loss: 7439.6109
Epoch 4/50, Loss: 7194.4680
Epoch 5/50, Loss: 7029.1695
Epoch 6/50, Loss: 6922.0646
Epoch 7/50, Loss: 6823.6672
Epoch 8/50, Loss: 6764.7547
Epoch 9/50, Loss: 6713.7690
Epoch 10/50, Loss: 6682.7555
Epoch 11/50, Loss: 6652.4398
Epoch 12/50, Loss: 6629.7009
Epoch 13/50, Loss: 6608.1597
Epoch 14/50, Loss: 6594.4859
Epoch 15/50, Loss: 6580.2053
Epoch 16/50, Loss: 6570.9692
Epoch 17/50, Loss: 6562.7112
Epoch 18/50, Loss: 6554.2113
Epoch 19/50, Loss: 6543.2068
Epoch 20/50, Loss: 6537.0615
Epoch 21/50, Loss: 6530.1085
Epoch 22/50, Loss: 6523.2698
Epoch 23/50, Loss: 6518.3813
Epoch 24/50, Loss: 6512.0023
Epoch 25/50, Loss: 6507.5854
Epoch 26/50, Loss: 6501.2923
Epoch 27/50, Loss: 6497.5254
Epoch 28/50, Loss: 6489.4555
Epoch 29/50, Loss: 6486.8823
Epoch 30/50, Loss: 6482.3759
Epoch 31/50, Loss: 6475.8285
Epoch 32/50, Loss: 6472.8637
Epoch 33/50, Loss: 6467.2269
Epoch 34/50, Loss: 6461.4004
Epoch 35/50, Loss: 6459

NameError: name 'plt' is not defined

In [None]:
display_last_generated_images(generated_images, num_last=5)

In [12]:
import torch
from torchvision.models import inception_v3
from torchvision.transforms import Resize, Normalize
from scipy.linalg import sqrtm
import numpy as np
import torch.nn.functional as F


# Load Pretrained InceptionV3
def load_inception_model():
    model = inception_v3(pretrained=True, transform_input=False)
    model.fc = torch.nn.Identity()  # Remove the classification layer
    model.eval()
    return model


# Compute Inception Score (IS)
def compute_inception_score(images, inception, device, splits=10):
    """
    Compute the Inception Score for generated images.

    Args:
        images (torch.Tensor): Tensor of generated images, shape [N, 3, H, W].
        inception (torch.nn.Module): Pretrained InceptionV3 model.
        device (torch.device): Device to run the model.
        splits (int): Number of splits for score calculation.

    Returns:
        (float, float): Mean and standard deviation of Inception Score.
    """
    images = images.to(device)
    with torch.no_grad():
        preds = F.softmax(inception(images), dim=1).cpu().numpy()  # Predict class probabilities
    split_scores = []
    for k in range(splits):
        part = preds[k * (len(preds) // splits): (k + 1) * (len(preds) // splits)]
        kl_div = part * (np.log(part) - np.log(np.mean(part, axis=0, keepdims=True)))
        split_scores.append(np.exp(np.mean(np.sum(kl_div, axis=1))))
    return np.mean(split_scores), np.std(split_scores)


# Compute Frechet Inception Distance (FID)
def compute_fid(real_activations, fake_activations):
    """
    Compute the Frechet Inception Distance (FID) between real and generated images.

    Args:
        real_activations (np.ndarray): Activations of real images.
        fake_activations (np.ndarray): Activations of generated images.

    Returns:
        float: FID score.
    """
    mu1, sigma1 = np.mean(real_activations, axis=0), np.cov(real_activations, rowvar=False)
    mu2, sigma2 = np.mean(fake_activations, axis=0), np.cov(fake_activations, rowvar=False)
    diff = mu1 - mu2
    covmean = sqrtm(sigma1.dot(sigma2))
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    fid = np.sum(diff**2) + np.trace(sigma1 + sigma2 - 2 * covmean)
    return fid


In [None]:
from torchvision.transforms import Compose, ToTensor

# Preprocess images for InceptionV3
def preprocess_images_for_inception(images, resize=299):
    transform = Compose([
        Resize((resize, resize)),  # Resize to 299x299 for InceptionV3
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Inception normalization
    ])
    images = images.float() if images.dtype == torch.uint8 else images
    images = images / 255.0 if images.max() > 1 else images  # Scale to [0, 1] if needed
    return torch.stack([transform(image) for image in images])


In [None]:
def evaluate_metrics(vae, dataloader, num_generated, latent_dim, device):
    """
    Evaluate Inception Score (IS) and FID for a trained VAE.

    Args:
        vae (torch.nn.Module): Trained VAE model.
        dataloader (torch.utils.data.DataLoader): DataLoader for real images.
        num_generated (int): Number of generated images for evaluation.
        latent_dim (int): Dimension of the latent space.
        device (torch.device): Device to run evaluation.

    Returns:
        (float, float, float): Inception Score (mean, std) and FID score.
    """
    # Load pretrained InceptionV3
    inception = load_inception_model().to(device)

    # Generate images
    vae.eval()
    with torch.no_grad():
        z = torch.randn(num_generated, latent_dim).to(device)
        generated_images = vae.decoder(z).cpu().detach()

    # Preprocess generated images
    generated_images = preprocess_images_for_inception(generated_images).to(device)

    # Compute activations for generated images
    with torch.no_grad():
        fake_activations = inception(generated_images).cpu().numpy()

    # Compute activations for real images
    real_activations = []
    for real_images, _ in dataloader:
        real_images = preprocess_images_for_inception(real_images).to(device)
        with torch.no_grad():
            real_activations.append(inception(real_images).cpu().numpy())
    real_activations = np.concatenate(real_activations, axis=0)

    # Compute Inception Score
    inception_score, inception_std = compute_inception_score(generated_images, inception, device)

    # Compute FID
    fid = compute_fid(real_activations, fake_activations)

    print(f"Inception Score: {inception_score} ± {inception_std}")
    print(f"Frechet Inception Distance: {fid}")
    return inception_score, inception_std, fid


In [None]:
# Evaluate metrics
latent_dim = 64
num_generated = 500

# Ensure the DataLoader has the real dataset
dataloader = DataLoader(dataset, batch_size=64, shuffle=False)

# Evaluate Inception Score and FID
is_mean, is_std, fid = evaluate_metrics(vae, dataloader, num_generated, latent_dim, device=device)


In [13]:
image_folder = 'D:\Loyalist Subjects\Semester 3\2024F-T3 AISC2007 - Deep Learning 01\Deep Learning\yelp_photos\photos'
json_file = 'D:\Loyalist Subjects\Semester 3\2024F-T3 AISC2007 - Deep Learning 01\Deep Learning\yelp_photos\photos.json'
label_filter = 'drink'

NameError: name 'image_size' is not defined