<a href="https://colab.research.google.com/github/Muhammad224172/Drough_index/blob/main/Autoencoders%2CVAE%2CVQ_VAE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!pip install celluloid

Collecting celluloid
  Downloading celluloid-0.2.0-py3-none-any.whl.metadata (4.8 kB)
Downloading celluloid-0.2.0-py3-none-any.whl (5.4 kB)
Installing collected packages: celluloid
Successfully installed celluloid-0.2.0


Inroduction to Autoencoders

In [8]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from tqdm.auto import tqdm
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE


### Stuff to Visualize the Latent Space ###
from celluloid import Camera
from IPython.display import HTML

### Seed Everything ###
torch.manual_seed(0)
torch.cuda.manual_seed(0)
np.random.seed(0)
random.seed(0)

### GENERATE ANIMATIONS ###
generate_anim = False

# Define transform pipeline
transform = transforms.Compose([
    transforms.ToTensor(),  # Converts image to PyTorch tensor
    transforms.Normalize((0.1307,), (0.3081,))  # Normalize with MNIST mean and std
])

# Load training dataset
train_set = MNIST(root='../../data/mnist', train=True, download=True, transform=transform)
test_set = MNIST("../../../data/mnist/", train=False, transform=transform)


In [9]:
for image , label in train_set:
  print(image.min())
  print(image.max())
  break

tensor(-0.4242)
tensor(2.8215)


In [48]:
class Vanilla(nn.Module):
    def __init__(self, input_dim=784, bottleneck_size=2):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, bottleneck_size)
        )

        self.decoder = nn.Sequential(
            nn.Linear(bottleneck_size, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim),
            nn.Tanh()
        )

    def forward(self, x):
        # Store original dimensions
        original_shape = x.shape

        # Flatten the input
        x_flat = x.reshape(original_shape[0], -1)

        # Encode and decode
        encoded = self.encoder(x_flat)
        decoded_flat = self.decoder(encoded)

        # Reshape to original dimensions
        decoded = decoded_flat.reshape(original_shape)

        return encoded, decoded

"Flattening is not ideal for images as spatial information is lost.

In [47]:
  model = Vanilla()
  rand = torch.randn(2,1,32,32)
  model(rand)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x1024 and 784x128)

Train the model

In [45]:
def train(model ,
          train_set,
          test_set,
          batch_size,
          training_iterations ,
          evaluation_iterations , verbose = True):

# print("Trainig Model")
# print(model)
  device = "cuda" if torch.cuda.is_available() else "cpu"
  model = model.to(device)
  trainloader = DataLoader(train_set, batch_size = batch_size)
  testloader = DataLoader(test_set, batch_size = batch_size)
  optimizer = optim.Adam(model.parameters(), lr = 0.0005)

  train_loss = []
  evaluation = []
  train_losses = []
  evaluation_loss = []
  encoded_data_per_eval = []
  pbar = tqdm(range(training_iterations))

  train = True
  step_counter = 0
  while train:

    for images, labels in trainloader:

            images = images.to(device)
            encoded, reconstruction = model(images)

            ### Simple MSE Loss ###
            loss = torch.mean((images - reconstruction)**2)
            train_loss.append(loss.item())

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

    if step_counter % evaluation_iterations == 0:

                model.eval()
                encoded_evaluations = []

                for images, labels in testloader:

                    images = images.to(device)
                    encoded, reconstruction = model(images)
                    loss = torch.mean((images - reconstruction)**2)
                    evaluation_loss.append(loss.item())

                    ### Store the Encoded Image with their Labels ###
                    encoded, labels = encoded.cpu().flatten(1), labels.reshape(-1,1)
                    encoded_evaluations.append(torch.cat([encoded, labels], dim=-1))
                ### Store All Testing Encoded Images ###
                encoded_data_per_eval.append(torch.concatenate(encoded_evaluations).detach())

                train_loss = np.mean(train_loss)
                evaluation_loss = np.mean(evaluation_loss)

                train_losses.append(train_loss)
                evaluation_losses.append(evaluation_loss)

                if verbose:
                    print("Training Loss", train_loss)
                    print("Evaluation Loss", evaluation_loss)

                ### Reset For Next Evaluation ###
                train_loss = []
                evaluation_loss = []

                model.train()


    step_counter += 1
    pbar.update(1)

    if step_counter >= training_iterations:
                print("Completed Training!")
                train = False
                break

    ### Store All Encoded Data as Numpy Arrays for each Eval Iteration ###
    encoded_data_per_eval = [np.array(i) for i in encoded_data_per_eval]

    print("Final Training Loss", train_losses[-1])
    print("Final Evaluation Loss", evaluation_losses[-1])

    return model, train_losses, evaluation_losses, encoded_data_per_eval

vanilla_model = Vanilla(bottleneck_size=2)
vanilla_model, train_losses, evaluation_losses, vanilla_encoded_data = train(vanilla_model,
                                                                             train_set,
                                                                             test_set,
                                                                             batch_size=64,
                                                                             training_iterations=25000,
                                                                             evaluation_iterations=250)

  0%|          | 0/25000 [00:00<?, ?it/s]

RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x784 and 1024x128)

  0%|          | 0/25000 [00:00<?, ?it/s]

RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x784 and 1024x128)