# **Suryakanta Karan (M22AIE207) m22aie207@iitj.ac.in**

In [1]:
import os
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
os.chdir('/content/drive/MyDrive/surya/DL_Assignment/Fractal-1_Assignment-1')

Mounted at /content/drive


In [9]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Dense, Conv2D, MaxPooling2D, UpSampling2D, Flatten, Reshape
from keras.models import Model
from keras.datasets import mnist
from keras.preprocessing.image import ImageDataGenerator


In [2]:
import os
import urllib.request
import tarfile

# Specify the URL and file name
url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar"
file_name = "VOCtrainval_06-Nov-2007.tar"

# Specify the path to check for the file
file_path = "/content/drive/MyDrive/surya/DL_Assignment/Fractal-2_Assignment-2/"

# Specify the directory where you want to extract the files
output_directory = os.path.join(file_path, "VOC2007")

# Check if the output_directory already exists
if not os.path.exists(output_directory):
    # Check if the file already exists in the specified path
    if not os.path.exists(os.path.join(file_path, file_name)):
        # If it doesn't exist, download the file
        urllib.request.urlretrieve(url, os.path.join(file_path, file_name))

    # Open the TAR archive
    with tarfile.open(os.path.join(file_path, file_name), 'r') as tar_ref:
        # Extract all the contents to the specified directory
        tar_ref.extractall(output_directory)
else:
    print(f"The directory '{output_directory}' already exists. No need to download and extract.")


In [2]:
import os
import shutil
import random
from sklearn.model_selection import train_test_split
import cv2

# Define the paths to your dataset and where you want to store the splits
dataset_path = '/content/drive/MyDrive/surya/DL_Assignment/Fractal-2_Assignment-2/VOC2007/VOCdevkit/VOC2007/'
output_path = '/content/drive/MyDrive/surya/DL_Assignment/Fractal-2_Assignment-2/VOC2007/VOCdevkit/VOC2007/Output'

# Define the desired image size and pixel normalization values
image_size = (224, 224)
pixel_mean = (0.485, 0.456, 0.406)
pixel_std = (0.229, 0.224, 0.225)

# Function to resize and normalize an image
def preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, image_size)
    img = (img / 255.0 - pixel_mean) / pixel_std
    return img

# Create output directories
os.makedirs(output_path, exist_ok=True)
os.makedirs(os.path.join(output_path, 'train'), exist_ok=True)
os.makedirs(os.path.join(output_path, 'val'), exist_ok=True)
os.makedirs(os.path.join(output_path, 'test'), exist_ok=True)

# List of image filenames
image_files = os.listdir(os.path.join(dataset_path, 'JPEGImages'))

# Split the dataset into train, validation, and test sets
if True:  # Change to False for 70-10-20 split
    train_size = 0.8
else:
    train_size = 0.7

train_files, test_files = train_test_split(image_files, train_size=train_size, random_state=42)
val_files, test_files = train_test_split(test_files, test_size=0.5, random_state=42)

# Copy and preprocess images
for split, files in [('train', train_files), ('val', val_files), ('test', test_files)]:
    for file in files:
        image_path = os.path.join(dataset_path, 'JPEGImages', file)
        output_image_path = os.path.join(output_path, split, file)
        output_image = preprocess_image(image_path)
        cv2.imwrite(output_image_path, output_image)

# Copy corresponding XML annotations
for split in ['train', 'val', 'test']:
    for file in os.listdir(os.path.join(dataset_path, 'Annotations')):
        if file.replace('.xml', '.jpg') in os.listdir(os.path.join(output_path, split)):
            shutil.copy(
                os.path.join(dataset_path, 'Annotations', file),
                os.path.join(output_path, split, file),
            )

print("Dataset preparation and split completed.")


Dataset preparation and split completed.


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Dense, GaussianNoise, Masking
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error
import cv2
from tensorflow.keras.layers import Reshape

#decoder = Reshape(input_shape[1:])(decoder)

# Function to create and train autoencoder
def create_autoencoder(bottleneck_dim):
    input_shape = (224, 224, 3)

    # Encoder
    input_layer = Input(shape=input_shape)
    encoder = Dense(bottleneck_dim, activation='relu')(input_layer)

    # Decoder
    decoder = Dense(np.prod(input_shape), activation='sigmoid')(encoder)
    decoder = GaussianNoise(0.1)(decoder)
    decoder = Masking(mask_value=0.0)(decoder)
    decoder = Reshape(input_shape)(decoder)
    #decoder = Reshape(input_shape[1:])(decoder)

    # Autoencoder
    autoencoder = Model(inputs=input_layer, outputs=decoder)
    autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

    return autoencoder

# Function to apply masking strategy to images
def apply_mask(images, mask_percentage):
    mask = np.random.choice([0, 1], size=images.shape, p=[mask_percentage, 1 - mask_percentage])
    masked_images = images * mask
    return masked_images

# Function to load and preprocess images
def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    if img is not None:
        img = img / 255.0  # Normalize the pixel values
    return img

# Function to load the dataset
def load_dataset(split_path):
    dataset = []
    for split in ['train', 'val', 'test']:
        split_images = []
        for file in os.listdir(os.path.join(split_path, split)):
            image_path = os.path.join(split_path, split, file)
            img = load_and_preprocess_image(image_path)
            if img is not None:
                split_images.append(img)
        dataset.append(np.array(split_images))
    return dataset

# Train and evaluate autoencoder
def train_and_evaluate_autoencoder(bottleneck_dim, mask_percentage, dataset):
    autoencoder = create_autoencoder(bottleneck_dim)

    # Train autoencoder
    autoencoder.fit(
        apply_mask(dataset[0], mask_percentage),
        dataset[0],
        epochs=10,
        batch_size=32,
        validation_data=(apply_mask(dataset[1], mask_percentage), dataset[1]),
        verbose=2,
    )

    # Evaluate autoencoder
    reconstructed_images = autoencoder.predict(apply_mask(dataset[2], mask_percentage))
    mse = mean_squared_error(dataset[2], reconstructed_images)
    mae = mean_absolute_error(dataset[2], reconstructed_images)

    return mse, mae

# Load the dataset
dataset_path = '/content/drive/MyDrive/surya/DL_Assignment/Fractal-2_Assignment-2/VOC2007/VOCdevkit/VOC2007/Output'
dataset = load_dataset(dataset_path)

# Experiment 3: Varying bottleneck dimensions
bottleneck_dimensions = [256, 128, 64, 32, 16]
reconstruction_errors = []

for bottleneck_dim in bottleneck_dimensions:
    mse, _ = train_and_evaluate_autoencoder(bottleneck_dim, 0, dataset)
    reconstruction_errors.append(mse)

best_bottleneck_dim = bottleneck_dimensions[np.argmin(reconstruction_errors)]

# Experiment 4: Masking strategy
mask_percentages = [0.2, 0.4, 0.6, 0.8]
mse_results = []

for mask_percentage in mask_percentages:
    mse, _ = train_and_evaluate_autoencoder(best_bottleneck_dim, mask_percentage, dataset)
    mse_results.append(mse)

# Experiment 5: Plot reconstruction errors
plt.figure(figsize=(8, 6))
plt.plot(bottleneck_dimensions, reconstruction_errors, marker='o', label='Bottleneck Dimensions')
plt.plot(mask_percentages, mse_results, marker='o', label='Masking Strategy')
plt.xlabel('Bottleneck Dimension / Mask Percentage')
plt.ylabel('Mean Squared Error')
plt.legend()
plt.title('Reconstruction Error for Autoencoder Models')
plt.show()

# Experiment 6: Evaluation
print(f"Best Bottleneck Dimension: {best_bottleneck_dim}")

# Train the autoencoder with the best bottleneck dimension and evaluate
final_mse, final_mae = train_and_evaluate_autoencoder(best_bottleneck_dim, 0.4, dataset)
print(f"Final MSE: {final_mse}")
print(f"Final MAE: {final_mae}")


In [None]:
import matplotlib.pyplot as plt
from skimage.metrics import structural_similarity as ssim

# Function to visualize and compare images
def visualize_images(original, masked, reconstructed, title):
    plt.figure(figsize=(15, 5))

    # Original Image
    plt.subplot(1, 3, 1)
    plt.imshow(original)
    plt.title("Original Image")

    # Masked Image
    plt.subplot(1, 3, 2)
    plt.imshow(masked)
    plt.title("Masked Image")

    # Reconstructed Image
    plt.subplot(1, 3, 3)
    plt.imshow(reconstructed)
    plt.title("Reconstructed Image")

    plt.suptitle(title)
    plt.show()

# Function to calculate Structural Similarity Index (SSI)
def calculate_ssim(original, reconstructed):
    return ssim(original, reconstructed, multichannel=True)

# Load the best split and best masking strategy
best_split = '80-10-10'  # Choose the best split (e.g., '80-10-10')
best_bottleneck_dim = 256  # Choose the best bottleneck dimension
best_mask_percentage = 0.4  # Choose the best masking strategy (e.g., 0.4)

# Load the dataset for the best split
best_dataset_path = '/content/drive/MyDrive/surya/DL_Assignment/Fractal-2_Assignment-2/VOC2007/VOCdevkit/VOC2007/Output/'
best_dataset = load_dataset(best_dataset_path)

# Train and evaluate autoencoder for the best split and masking strategy
best_mse, best_mae = train_and_evaluate_autoencoder(best_bottleneck_dim, best_mask_percentage, best_dataset)

# Visualize and compare images for a sample
sample_index = 0  # Choose an index from the dataset
original_image = best_dataset[2][sample_index]  # Test set image
masked_image = apply_mask(original_image, best_mask_percentage)
reconstructed_image = autoencoder.predict(masked_image[None, ...])

visualize_images(original_image, masked_image[0], reconstructed_image[0], "Image Comparison")

# Calculate SSI for image quality
ssi = calculate_ssim(original_image, reconstructed_image[0])
print(f"SSI: {ssi}")


#Task 2

In [None]:
# Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from sklearn.metrics import confusion_matrix, roc_auc_score

# Download and preprocess the STL-10 dataset
transform = transforms.Compose(
    [transforms.Resize((28, 28)), # Resize the images to 28x28
     transforms.ToTensor(), # Convert the images to tensors
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # Normalize the pixel values

trainset = torchvision.datasets.STL10(root='/content/drive/MyDrive/surya/DL_Assignment/Fractal-2_Assignment-2/VOC2007/VOCdevkit/VOC2007/JPEGImages', split='train', download=True, transform=transform) # Load the train set
testset = torchvision.datasets.STL10(root='/content/drive/MyDrive/surya/DL_Assignment/Fractal-2_Assignment-2/VOC2007/VOCdevkit/VOC2007/JPEGImages', split='test', download=True, transform=transform) # Load the test set
unlabeledset = torchvision.datasets.STL10(root='/content/drive/MyDrive/surya/DL_Assignment/Fractal-2_Assignment-2/VOC2007/VOCdevkit/VOC2007/JPEGImages', split='unlabeled', download=True, transform=transform) # Load the unlabeled set

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2) # Create a data loader for the train set
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=2) # Create a data loader for the test set
unlabeledloader = torch.utils.data.DataLoader(unlabeledset, batch_size=32, shuffle=True, num_workers=2) # Create a data loader for the unlabeled set

# Define an autoencoder with three hidden layers and a bottleneck dimension of 256
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1), # First convolutional layer with 16 filters
            nn.ReLU(), # First activation function
            nn.MaxPool2d(2), # First pooling layer to reduce the spatial dimensions by half
            nn.Conv2d(16, 8, 3, padding=1), # Second convolutional layer with 8 filters
            nn.ReLU(), # Second activation function
            nn.MaxPool2d(2), # Second pooling layer to reduce the spatial dimensions by half
            nn.Conv2d(8, 8, 3, padding=1), # Third convolutional layer with 8 filters
            nn.ReLU(), # Third activation function
            nn.MaxPool2d(2) # Third pooling layer to reduce the spatial dimensions by half and produce the encoded representation of size (4, 4, 8)
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(8, 8, 3), # First deconvolutional layer with 8 filters
            nn.ReLU(), # First activation function
            nn.Upsample(scale_factor=2), # First upsampling layer to increase the spatial dimensions by two times
            nn.ConvTranspose2d(8, 8, 3), # Second deconvolutional layer with 8 filters
            nn.ReLU(), # Second activation function
            nn.Upsample(scale_factor=2), # Second upsampling layer to increase the spatial dimensions by two times
            nn.ConvTranspose2d(8, 16, 3), # Third deconvolutional layer with 16 filters
            nn.ReLU(), # Third activation function
            nn.Upsample(scale_factor=2), # Third upsampling layer to increase the spatial dimensions by two times
            nn.ConvTranspose2d(16, 3, 3, padding=1), # Final layer to produce the decoded image of size (28, 28, 3)
            nn.Sigmoid() # Final activation function
        )

    def forward(self, x):
        x = self.encoder(x) # Encode the input image
        x = self.decoder(x) # Decode the encoded representation
        return x

# Instantiate the autoencoder model
autoencoder = Autoencoder()

# Define the loss function and the optimizer
criterion = nn.MSELoss() # Use mean squared error as the loss function
optimizer = optim.Adam(autoencoder.parameters()) # Use Adam as the optimizer

# Train the autoencoder on the unlabeled data for 50 epochs
num_epochs = 50
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(unlabeledloader, 0):
        inputs, _ = data # Get the inputs and ignore the labels
        optimizer.zero_grad() # Zero the parameter gradients
        outputs = autoencoder(inputs) # Forward pass
        loss = criterion(outputs, inputs) # Compute the loss
        loss.backward() # Backward pass
        optimizer.step() # Update