# Pre GPU CNN Autoencoder

After researching the nature of an autoencoder as well as convolutional neural networks, we realized that it may be more effective to use an autencoder with CNN, rather than with MLP layers. This code also takes a very long time to run.

## NOTE: This code was generated using ChatGPT

In [None]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

# Check for device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Hyperparameters
batch_size = 32
learning_rate = 0.001
num_epochs = 20
image_size = (128, 128)  # Resize all images to 128x128
latent_dim = 128  # Size of the bottleneck

# Custom dataset class
class EyeDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        """
        Args:
            csv_file (str): Path to the CSV file with image names and labels.
            img_dir (str): Directory with all the images.
            transform (callable, optional): Transform to be applied on an image.
        """
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        z = 0
        while z < 150:
          img_name = os.path.join(self.img_dir, self.data.iloc[idx, 0])  # Image file path
          image = Image.open(img_name).convert("RGB")  # Load image as RGB
          label = self.data.iloc[idx, 1]  # Eye color label (not used in autoencoder)

          if self.transform:
              image = self.transform(image)
          z += 1
        return image, label  # Return image and label

# Paths to the dataset
csv_file = "/content/drive/MyDrive/ML final project/datasets/iris_labels_part1.csv"  # Replace with the path to your .csv file
img_dir = "/content/drive/MyDrive/ML final project/datasets/CLASSES_400_300_Part1"  # Replace with the path to the folder containing images

# Transformations for the images
transform = transforms.Compose([
    transforms.Resize(image_size),  # Resize to 128x128
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

# Load the dataset
dataset = EyeDataset(csv_file=csv_file, img_dir=img_dir, transform=transform)

# Split dataset into training and validation sets (80/20 split)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Define the Convolutional Autoencoder
class ConvAutoencoder(nn.Module):
    def __init__(self, latent_dim):
        super(ConvAutoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),  # (64, 64, 64)
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),  # (128, 32, 32)
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),  # (256, 16, 16)
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(256 * 16 * 16, latent_dim),  # Bottleneck
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 256 * 16 * 16),
            nn.Unflatten(1, (256, 16, 16)),
            nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),  # (128, 32, 32)
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),  # (64, 64, 64)
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, kernel_size=3, stride=2, padding=1, output_padding=1),  # (3, 128, 128)
            nn.Tanh(),  # Output range [-1, 1]
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# Instantiate the model
model = ConvAutoencoder(latent_dim=latent_dim).to(device)

# Loss function and optimizer
criterion = nn.MSELoss()  # Reconstruction loss
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training function
def train(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        train_loss = 0.0
        for images, _ in train_loader:  # Ignore labels since this is unsupervised
            images = images.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, images)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        avg_loss = train_loss / len(train_loader)
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

# Validation function
def validate(model, val_loader, criterion):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, _ in val_loader:
            images = images.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, images)

            val_loss += loss.item()

    avg_loss = val_loss / len(val_loader)
    print(f"Validation Loss: {avg_loss:.4f}")

# Visualize some reconstructed images
def visualize_reconstructions(model, data_loader):
    model.eval()
    images, _ = next(iter(data_loader))
    images = images.to(device)
    with torch.no_grad():
        reconstructed = model(images)

    # Unnormalize and plot the original and reconstructed images
    images = images.cpu() * 0.5 + 0.5  # Unnormalize
    reconstructed = reconstructed.cpu() * 0.5 + 0.5
    fig, axes = plt.subplots(2, 6, figsize=(12, 4))

    for i in range(6):
        # Original images
        axes[0, i].imshow(images[i].permute(1, 2, 0).numpy())
        axes[0, i].axis("off")
        axes[0, i].set_title("Original")

        # Reconstructed images
        axes[1, i].imshow(reconstructed[i].permute(1, 2, 0).numpy())
        axes[1, i].axis("off")
        axes[1, i].set_title("Reconstructed")

    plt.tight_layout()
    plt.show()

# Main script
if __name__ == "__main__":
    print("Training autoencoder...")
    train(model, train_loader, criterion, optimizer, num_epochs)
    print("Validating autoencoder...")
    validate(model, val_loader, criterion)
    print("Visualizing reconstructions...")
    visualize_reconstructions(model, val_loader)

Using device: cuda
Training autoencoder...
