In [6]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from PIL import Image, UnidentifiedImageError

In [7]:
# Custom collate function to filter out corrupted images
def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))
    return torch.utils.data.dataloader.default_collate(batch)

# Custom loader to handle corrupted images
def safe_loader(path):
    try:
        with open(path, "rb") as f:
            img = Image.open(f)
            return img.convert("RGB")
    except UnidentifiedImageError:
        print(f"Skipping corrupted image: {path}")
        return None


In [8]:
#defining the paths to img folders
train_dir = '/home/arnav/Documents/research_project/assignment1/images/train'
test_dir = '/home/arnav/Documents/research_project/assignment1/images/test'
val_dir = '/home/arnav/Documents/research_project/assignment1/images/val'



1.(b) Use the image transforms to resize the images to size 64 × 64, followed by the normalization
of every image using mean: [0.485, 0.456, 0.406] and standard deviation:
[0.229,0.224,0.225].



In [9]:
#Ive used .Compose to group transforms together
transform = transforms.Compose([transforms.Resize((64,64)),
                                transforms.ToTensor(), #Convert a PIL Image or ndarray to tensor and scale the values accordingly, this transformation scales the pixel values from the range [0, 255] to the range [0, 1].
                               transforms.Normalize(mean = [0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) #Normalize a tensor image with mean and standard deviation. Mean normalization: Subtracts the mean value from each channel (R, G, B) of the image tensor.Standard deviation normalization: Divides each channel by its standard deviation.



1.(c)Read the training, validation and test data from their respective folders using “Imagefolder”
and “DataLoader” packages defined in torchvision and torch after fixing the batch size as 64.

In [10]:
train_data = datasets.ImageFolder(root= train_dir, transform=transform, loader= safe_loader)
test_data = datasets.ImageFolder(root= test_dir, transform=transform, loader = safe_loader)
val_data = datasets.ImageFolder(root= val_dir, transform=transform, loader = safe_loader)

batch_size = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
val_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)


    The shuffle parameter controls whether the data should be shuffled at the beginning of each epoch:

    Training Data (shuffle=True): Shuffling the training data ensures that the model does not learn the order of the data, which could lead to overfitting. 
    It helps in providing the model with a varied distribution of samples in each batch, leading to better generalization.
    
    Validation and Test Data (shuffle=False): For validation and test data, shuffling is typically not necessary.
    The purpose of these datasets is to evaluate the model's performance on unseen data.
    Shuffling the validation and test data does not affect the evaluation metrics but keeping the order can sometimes help in debugging and reproducibility.

1.(d) Define the simple network architecture using the three linear layers: L1 ; L2 and L3 using
arguments (12288, 84); (84,50) and (50,2) respectively.

In [11]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.l1 = nn.Linear(64*64*3, 84) #first layer
        self.l2 = nn.Linear(84, 50)
        self.l3 = nn.Linear(50,2)
        self.relu = nn.ReLU() #Rectified Linear Units: activation function. simply returns max(0,x)

    def forward(self, x):
        x= x.view(x.size(0), -1) # reshapes the input tensor to have the shape (batch_size, num_features), where num_features is the product of the dimensions of the image.
        x = self.relu(self.l1(x)) #pass the flattened input through first layer
        x = self.relu(self.l2(x))
        x = self.l3(x) # passes the output of the second layer through the third fully connected layer, This layer does not have an activation function applied, as it directly outputs the raw scores (logits) for the two classes (cat and fish)

        return x  
        

1.(e) Use the Adam optimizer.

In [12]:
from torchvision.models import alexnet

class CustomAlexNet(nn.Module):
    def __init__(self):
        super(CustomAlexNet, self).__init__()
        self.alexnet = alexnet(pretrained=False)
        self.alexnet.classifier[6] = nn.Linear(4096, 2)#replaced the 6th and final layer of the alexnet classifier with a layer that outputs 2 classes (cat and fish)
        
    def forward(self, x):
        return self.alexnet(x)

1.(f) Copy the model to GPU and complete the training function below:


In [13]:
"""
def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=25, device=”cpu”):
...
...
for epoch in range(1, epochs + 1):
....
model.train()
for batch in train_loader:
...
...
...
model.eval()
...
...
for batch in val_loader:
...
...
...
g)print(epoch, training_loss, val_loss, accuracy)
print(average_accuracy)"""

'\ndef train(model, optimizer, loss_fn, train_loader, val_loader, epochs=25, device=”cpu”):\n...\n...\nfor epoch in range(1, epochs + 1):\n....\nmodel.train()\nfor batch in train_loader:\n...\n...\n...\nmodel.eval()\n...\n...\nfor batch in val_loader:\n...\n...\n...\ng)print(epoch, training_loss, val_loss, accuracy)\nprint(average_accuracy)'

In [14]:
def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=25, device="cpu"):
    model.to(device)
    train_accuracies = []
    val_accuracies = []

    for epoch in range(1, epochs + 1):
        
        model.train()  # Setting the model to train mode
        training_loss = 0
        correct = 0
        total = 0
        for batch in train_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()  # Set gradient to zero
            outputs = model(inputs)  # Forward pass
            loss = loss_fn(outputs, targets)
            loss.backward()  # Backward pass
            optimizer.step()
            training_loss += loss.item()  # Loss.item() converts the loss tensor to a standard Python number (float)

            # Calculate training accuracy
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
        
        train_accuracy = correct / total
        train_accuracies.append(train_accuracy)

        model.eval()  # Setting the model to validation mode
        val_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():  # Disable gradient computation during validation, which saves memory and computation time
            for batch in val_loader:
                inputs, targets = batch
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = loss_fn(outputs, targets)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()
                
        val_accuracy = correct / total
        val_accuracies.append(val_accuracy)

        print(f"Epoch {epoch}: Train Loss: {training_loss / len(train_loader)}, Train Accuracy: {train_accuracy}, Val Loss: {val_loss / len(val_loader)}, Val Accuracy: {val_accuracy}")

    avg_train_accuracy = sum(train_accuracies) / len(train_accuracies)
    avg_val_accuracy = sum(val_accuracies) / len(val_accuracies)
    print(f"Average Training Accuracy over {epochs} epochs: {avg_train_accuracy}")
    print(f"Average Validation Accuracy over {epochs} epochs: {avg_val_accuracy}")


In [15]:
def test(model, test_loader, device="cpu"):
    model.to(device)
    model.eval()
    correct = 0
    total = 0
    test_loss = 0
    with torch.no_grad():
        for batch in test_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)
            test_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    
    accuracy = correct / total
    print(f"Test Loss: {test_loss / len(test_loader)}, Test Accuracy: {accuracy}")


1.(h) Tweak the following parameters for both the simple network and AlexNet:
1) Learning rate: 0.1, 0.01, 0.001, 0.0001
2) Batch size: 8, 16, 32, 64, 128
3) Activation functions: ReLU, GeLU, SeLU , SiLU, Sigmoid
4) Epochs: 25, 50, 100
5) Number of input/output features only for simple network

In [18]:
batch_size = 64
NN1 = SimpleNN()
optimizer = optim.Adam(NN1.parameters(), lr=0.0001)
loss_fn = nn.CrossEntropyLoss()
device = "cuda" if torch.cuda.is_available() else "cpu"

train(NN1, optimizer, loss_fn, train_loader, val_loader, epochs=25, device=device)
test(NN1, test_loader, device=device)

alex_net = CustomAlexNet()
optimizer = optim.Adam(alex_net.parameters(), lr=0.0001)
loss_fn = nn.CrossEntropyLoss()

train(alex_net, optimizer, loss_fn, train_loader, val_loader, epochs=25, device=device)
test(alex_net, test_loader, device=device)


Epoch 1: Train Loss: 0.6165532523935492, Train Accuracy: 0.6838905775075987, Val Loss: 0.48925653100013733, Val Accuracy: 0.776595744680851
Epoch 2: Train Loss: 0.49354498223824933, Train Accuracy: 0.7781155015197568, Val Loss: 0.4168281067501415, Val Accuracy: 0.817629179331307
Epoch 3: Train Loss: 0.422606267712333, Train Accuracy: 0.8161094224924013, Val Loss: 0.36442464048212225, Val Accuracy: 0.8465045592705167
Epoch 4: Train Loss: 0.3689033199440349, Train Accuracy: 0.8358662613981763, Val Loss: 0.33533755757591943, Val Accuracy: 0.8662613981762918
Epoch 5: Train Loss: 0.36865936626087537, Train Accuracy: 0.8586626139817629, Val Loss: 0.30072188648310577, Val Accuracy: 0.89209726443769
Epoch 6: Train Loss: 0.31133536858992145, Train Accuracy: 0.89209726443769, Val Loss: 0.2669908052141016, Val Accuracy: 0.9057750759878419
Epoch 7: Train Loss: 0.2697857049378482, Train Accuracy: 0.9072948328267477, Val Loss: 0.23848506130955435, Val Accuracy: 0.9270516717325228
Epoch 8: Train Loss

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch 1: Train Loss: 0.6757534308867021, Train Accuracy: 0.5866261398176292, Val Loss: 0.6412490389563821, Val Accuracy: 0.5866261398176292
Epoch 2: Train Loss: 0.6274080926721747, Train Accuracy: 0.5866261398176292, Val Loss: 0.5573236617175016, Val Accuracy: 0.5866261398176292
Epoch 3: Train Loss: 0.5539684078910134, Train Accuracy: 0.7112462006079028, Val Loss: 0.5054145089604638, Val Accuracy: 0.7325227963525835
Epoch 4: Train Loss: 0.4713967632163655, Train Accuracy: 0.7887537993920972, Val Loss: 0.5278292122212324, Val Accuracy: 0.7264437689969605
Epoch 5: Train Loss: 0.45718805356459186, Train Accuracy: 0.7993920972644377, Val Loss: 0.4619624465703964, Val Accuracy: 0.8085106382978723
Epoch 6: Train Loss: 0.3902643160386519, Train Accuracy: 0.8282674772036475, Val Loss: 0.32616633447733795, Val Accuracy: 0.8647416413373861
Epoch 7: Train Loss: 0.33013753592967987, Train Accuracy: 0.8495440729483282, Val Loss: 0.2643240121277896, Val Accuracy: 0.8814589665653495
Epoch 8: Train Lo

In [17]:
# batch_size = 32
# NN1 = SimpleNN()
# optimizer = optim.Adam(NN1.parameters(), lr=0.0001)
# loss_fn = nn.CrossEntropyLoss()
# device = "cuda" if torch.cuda.is_available() else "cpu"

# train(NN1, optimizer, loss_fn, train_loader, val_loader, epochs=25, device=device)
# test(NN1, test_loader, device=device)

# alex_net = CustomAlexNet()
# optimizer = optim.Adam(alex_net.parameters(), lr=0.0001)
# loss_fn = nn.CrossEntropyLoss()

# train(alex_net, optimizer, loss_fn, train_loader, val_loader, epochs=25, device=device)
# test(alex_net, test_loader, device=device)
