In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

In [3]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

In [None]:
# Define the Variational Autoencoder (VAE)
class VAE(nn.Module):
    def __init__(self, input_dim=784, latent_dim=20):
        super(VAE, self).__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 400),
            nn.ReLU(),
            nn.Linear(400, 200),
            nn.ReLU(),
            nn.Linear(200, latent_dim * 2)  # Mean & Log Variance
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 200),
            nn.ReLU(),
            nn.Linear(200, 400),
            nn.ReLU(),
            nn.Linear(400, input_dim),
            nn.Sigmoid()  # Outputs between 0-1
        )

    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten input
        z_params = self.encoder(x)
        mu, log_var = z_params.chunk(2, dim=1)  # Split mean & log variance
        z = self.reparameterize(mu, log_var)
        x_recon = self.decoder(z)
        return x_recon, mu, log_var

# Load MNIST dataset (for handwritten digits)
transform = transforms.Compose([transforms.ToTensor()])
dataset = datasets.MNIST(root="./data", train=True, transform=transform, download=True)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# Initialize model, optimizer & loss function
vae = VAE()
optimizer = optim.Adam(vae.parameters(), lr=0.001)
loss_function = nn.MSELoss()

# Train the VAE
epochs = 10
vae.train()
for epoch in range(epochs):
    for images, _ in dataloader:
        images = images.view(images.size(0), -1)
        optimizer.zero_grad()
        recon_images, mu, log_var = vae(images)
        
        # Compute loss
        recon_loss = loss_function(recon_images, images)
        kl_div = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
        loss = recon_loss + kl_div
        
        loss.backward()
        optimizer.step()
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

# Function to reconstruct an input image and predict
def predict_image(image_path):
    image = Image.open(image_path).convert("L").resize((28, 28))  # Convert to grayscale & resize
    image = transforms.ToTensor()(image).view(1, -1)

    vae.eval()
    with torch.no_grad():
        reconstructed, _, _ = vae(image)
    
    # Display original & reconstructed image
    fig, axs = plt.subplots(1, 2)
    axs[0].imshow(image.view(28, 28), cmap="gray")
    axs[0].set_title("Original Image")
    axs[0].axis("off")

    axs[1].imshow(reconstructed.view(28, 28), cmap="gray")
    axs[1].set_title("Reconstructed Image")
    axs[1].axis("off")

    plt.show()

# Test with a sample image
image_path = r"C:\Users\Student\Pictures\Screenshots\num.png"
predict_image(image_path)

Epoch [1/10], Loss: 0.0697
Epoch [2/10], Loss: 0.0690
Epoch [3/10], Loss: 0.0655
Epoch [4/10], Loss: 0.0728


In [5]:
# Define the Variational Autoencoder (VAE)
class VAE(nn.Module):
    def __init__(self, input_dim=784, latent_dim=20):
        super(VAE, self).__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 400),
            nn.ReLU(),
            nn.Linear(400, 200),
            nn.ReLU(),
            nn.Linear(200, latent_dim * 2)  # Mean & Log Variance
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 200),
            nn.ReLU(),
            nn.Linear(200, 400),
            nn.ReLU(),
            nn.Linear(400, input_dim),
            nn.Sigmoid()  # Outputs between 0-1
        )

    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten input
        z_params = self.encoder(x)
        mu, log_var = z_params.chunk(2, dim=1)  # Split mean & log variance
        z = self.reparameterize(mu, log_var)
        x_recon = self.decoder(z)
        return x_recon, mu, log_var

# Load MNIST dataset (for handwritten digits)
transform = transforms.Compose([transforms.ToTensor()])
dataset = datasets.MNIST(root="./data", train=True, transform=transform, download=True)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# Initialize model, optimizer & loss function
vae = VAE()
optimizer = optim.Adam(vae.parameters(), lr=0.001)
loss_function = nn.MSELoss()

# Train the VAE
epochs = 10
vae.train()
for epoch in range(epochs):
    for images, _ in dataloader:
        images = images.view(images.size(0), -1)
        optimizer.zero_grad()
        recon_images, mu, log_var = vae(images)
        
        # Compute loss
        recon_loss = loss_function(recon_images, images)
        kl_div = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
        loss = recon_loss + kl_div
        
        loss.backward()
        optimizer.step()
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

# Function to reconstruct an input image and predict
def predict_image(image_path):
    image = Image.open(image_path).convert("L").resize((28, 28))  # Convert to grayscale & resize
    image = transforms.ToTensor()(image).view(1, -1)

    vae.eval()
    with torch.no_grad():
        reconstructed, _, _ = vae(image)
    
    # Display original & reconstructed image
    fig, axs = plt.subplots(1, 2)
    axs[0].imshow(image.view(28, 28), cmap="gray")
    axs[0].set_title("Original Image")
    axs[0].axis("off")

    axs[1].imshow(reconstructed.view(28, 28), cmap="red")
    axs[1].set_title("Reconstructed Image")
    axs[1].axis("off")

    plt.show()

# Test with a sample image
image_path = r"C:\Users\Student\Downloads\WhatsApp Image 2025-04-03 at 2.25.52 PM_11zon.png"
predict_image(image_path)

Epoch [1/10], Loss: 0.0687
Epoch [2/10], Loss: 0.0632
Epoch [3/10], Loss: 0.0684
Epoch [4/10], Loss: 0.0680
Epoch [5/10], Loss: 0.0674
Epoch [6/10], Loss: 0.0646
Epoch [7/10], Loss: 0.0658
Epoch [8/10], Loss: 0.0673
Epoch [9/10], Loss: 0.0709
Epoch [10/10], Loss: 0.0658


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Student\\Downloads\\WhatsApp Image 2025-04-03 at 2.25.52 PM_11zon.jpeg'