# Importing necessary modules

In [None]:
!pip install -r requirements.txt

In [None]:
#Importing necessary modules
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os
from sklearn.metrics import confusion_matrix
from PIL import Image
import pandas as pd
import zipfile
import shutil

device = "cuda" if torch.cuda.is_available() else "cpu"

# Model Instantiation and Training

In [None]:
#Defining the ResNext architecture 
class Block(nn.Module):
    """
    A building block of the ResNeXt architecture using group convolutions and a bottleneck structure.

    Args:
        in_planes (int): Number of input channels.
        cardinality (int): Number of groups in the group convolution (Default: 32).
        bottleneck_width (int): Width of the bottleneck (Default: 4).
        stride (int): Stride for the second convolution (Default: 1).

    Layers:
        - Conv1: 1x1 convolution.
        - Conv2: 3x3 group convolution.
        - Conv3: 1x1 convolution.
        - Shortcut: Skip connection to match dimensions if needed.

    Forward pass:
        Applies convolutions, batch normalization, ReLU activations, and a shortcut connection.
    """
    expansion = 2

    def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
        super(Block, self).__init__()
        group_width = cardinality * bottleneck_width
        self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=True)
        self.bn1 = nn.BatchNorm2d(group_width)
        self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=True)
        self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=True)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*group_width:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=True),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.conv2(out))
        out = self.conv3(out)
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNeXt(nn.Module):
    """
    ResNeXt model with a configurable number of blocks, cardinality, and bottleneck width.

    Args:
        num_blocks (list): List of integers specifying the number of blocks in each layer.
        cardinality (int): Number of groups in the group convolutions (Default: 32).
        bottleneck_width (int): Width of the bottleneck (Default: 4).
        num_classes (int): Number of output classes (Default: 2, for binary classification).

    Layers:
        - Conv1: Initial 3x3 convolution.
        - Layer1, Layer2, Layer3: Stacked blocks of `Block` class, each with increasing stride.
        - Linear1: Fully connected layer with 512 units.
        - Linear2: Output layer with `num_classes` units.

    Forward pass:
        Performs convolutions, batch normalization, ReLU activations, and fully connected layers.
    """
    def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=2):
        super(ResNeXt, self).__init__()
        self.cardinality = cardinality
        self.bottleneck_width = bottleneck_width
        self.in_planes = 16

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, bias=True, padding=1)
        self.layer1 = self._make_layer(num_blocks[0], 1)
        self.layer2 = self._make_layer(num_blocks[1], 2)
        self.layer3 = self._make_layer(num_blocks[2], 2)
        self.linear1 = nn.Linear(cardinality*bottleneck_width*512, 512)
        self.linear2 = nn.Linear(512, num_classes)  # Binary classification output (real/fake)

    def _make_layer(self, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
            self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
        self.bottleneck_width *= 2
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = torch.flatten(out, 1)
        out = F.relu(self.linear1(out))
        out = self.linear2(out)
        return out


# Initialize the model
def ResNeXt29_2x64d():
    return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)

In [None]:
# Unzipping custom_dataset.zip
#The unzipping results in 2 datasets - custom_dataset and test_dataset - being created in the working directory
with zipfile.ZipFile("custom_dataset.zip", 'r') as zip_ref:
    zip_ref.extractall('custom_dataset')

# Unzipping tes-final.zip
with zipfile.ZipFile("test_dataset.zip", 'r') as zip_ref:
    zip_ref.extractall('test_dataset')

In [None]:
# Function to remove .ipynb_checkpoints folder from the dataset
def remove_ipynb_checkpoints(dataset_dir):
    for root, dirs, files in os.walk(dataset_dir, topdown=False):
        # Check if .ipynb_checkpoints is in the directories list
        if '.ipynb_checkpoints' in dirs:
            # Construct the full path to the .ipynb_checkpoints directory
            checkpoint_dir = os.path.join(root, '.ipynb_checkpoints')
            # Remove the .ipynb_checkpoints directory and all its contents
            shutil.rmtree(checkpoint_dir)

# Paths to the train and test directories
train_dir = 'custom_dataset/train'
test_dir = 'custom_dataset/test'

# Remove .ipynb_checkpoints from both train and test directories
remove_ipynb_checkpoints(train_dir)
remove_ipynb_checkpoints(test_dir)
remove_ipynb_checkpoints('test_dataset/test-interiit/perturbed_images_32')


In [None]:
#Loading the dataset and performing necessary transforms to then train the model 
train_dir = 'custom_dataset/train'
test_dir = 'custom_dataset/test'  # Path to your test data
train_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

test_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transform)
test_dataset = datasets.ImageFolder(root=test_dir, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)


#Setup for binary classification
model = ResNeXt29_2x64d().to(device)
criterion = nn.CrossEntropyLoss() 
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {train_accuracy:.2f}%")

#Test Accuracy after training
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy:.2f}%")

#Save model weights
torch.save(model.state_dict(), 'resnext_model.pth')


# Model Inference and Prediction

In [None]:
#Initialize the model and load the trained weights to print out the classification metrics 
model = ResNeXt29_2x64d().to(device)
model.load_state_dict(torch.load('resnext_model.pth'))
model.eval()

# Load the test dataset
test_dir = 'custom_dataset/test'
test_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

test_dataset = datasets.ImageFolder(root=os.path.join(test_dir), transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

# Evaluate model on the test set and compute the confusion matrix
all_labels = []
all_preds = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

# Compute confusion matrix
cm = confusion_matrix(all_labels, all_preds)
print(f"Confusion Matrix:\n{cm}")


In [None]:
# Manually load the new test dataset (perturbed images)
perturbed_test_dir = 'test_dataset/test-interiit/perturbed_images_32'
perturbed_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

#Sort the filenames in ascending order (numeric sort)
sorted_filenames = sorted(os.listdir(perturbed_test_dir), key=lambda x: int(x.split('.')[0]))

#Prepare for storing predictions
predictions = []
filenames = []

#Process each image
for filename in sorted_filenames:
    image_path = os.path.join(perturbed_test_dir, filename)
    
    #Open image, apply transformations, and send to device
    image = Image.open(image_path).convert("RGB")
    image = perturbed_transform(image).unsqueeze(0).to(device)  # Add batch dimension

    # Make prediction
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs.data, 1)  # Get predicted class (0 or 1)
        filenames.append(filename)
        predictions.append(predicted.item())  # Add 0 or 1

# Create a DataFrame with filenames and their corresponding predictions
df = pd.DataFrame({
    "filename": filenames,
    "prediction": predictions
})

# Save the DataFrame to a CSV file
df.to_csv('final_predictions.csv', index=False)