In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import datasets
import zipfile
import urllib.request
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data_dir = 'cats_and_dogs_filtered'

if not os.path.exists(data_dir):
    print("Downloading Cats and Dogs dataset...")
    dataset_url = "https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip"
    dataset_path = "cats_and_dogs_filtered.zip"
    
    urllib.request.urlretrieve(dataset_url, dataset_path)

    with zipfile.ZipFile(dataset_path, 'r') as zip_ref:
        zip_ref.extractall()

    print(f"Dataset extracted to {data_dir}")

In [None]:
#question 1

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 32 * 32, 512)  # Adjusted for 256x256 image size
        self.fc2 = nn.Linear(512, 2)
        self.pool = nn.MaxPool2d(2, 2)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 128 * 32 * 32)  
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

transform = transforms.Compose([
    transforms.Resize((256, 256)),  
    transforms.ToTensor(),         
])

train_dir = os.path.join(data_dir, 'train')
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01) 

epochs = 10
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()  
        outputs = model(inputs)  
        loss = criterion(outputs, labels)  
        loss.backward()  
        optimizer.step()  

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader)}")

print("With L2 norm:")

for epoch in range(epochs):
    running_loss = 0.0
    l2_lambda = 0.01  
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        l2_reg = 0
        for param in model.parameters():
            l2_reg += torch.norm(param, 2)  
        
        loss += l2_lambda * l2_reg  
        
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader)}")


Epoch [1/10], Loss: 0.7911473153129457
Epoch [2/10], Loss: 0.6938958499166701
Epoch [3/10], Loss: 0.6931854968979245
Epoch [4/10], Loss: 0.6936405698458353
Epoch [5/10], Loss: 0.6933690527128795
Epoch [6/10], Loss: 0.6933544051079523
Epoch [7/10], Loss: 0.6931953770773751
Epoch [8/10], Loss: 0.6931847362291246
Epoch [9/10], Loss: 0.6932553762481326
Epoch [10/10], Loss: 0.6932744478422498
With L2 norm:
Epoch [1/10], Loss: 0.7710752468260508
Epoch [2/10], Loss: 0.769323450232309
Epoch [3/10], Loss: 0.7567859159575568
Epoch [4/10], Loss: 0.7429817744663784
Epoch [5/10], Loss: 0.7400355745875646
Epoch [6/10], Loss: 0.7343502148749337
Epoch [7/10], Loss: 0.728412303659651
Epoch [8/10], Loss: 0.7240960881823585
Epoch [9/10], Loss: 0.7202976582542299
Epoch [10/10], Loss: 0.7166589271454584


In [None]:
#question 2

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 32 * 32, 512)  
        self.fc2 = nn.Linear(512, 2)
        self.pool = nn.MaxPool2d(2, 2)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 128 * 32 * 32)  
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

transform = transforms.Compose([
    transforms.Resize((256, 256)),  
    transforms.ToTensor(),          
])

train_dir = os.path.join(data_dir, 'train')
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  

print("Training with L1 regularization (Optimizer)")
l1_lambda = 0.01  
epochs = 10
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()  
        outputs = model(inputs)  
        loss = criterion(outputs, labels)  

        l1_reg = 0
        for param in model.parameters():
            l1_reg += torch.sum(torch.abs(param))  
        
        loss += l1_lambda * l1_reg  
        
        loss.backward() 
        optimizer.step()  

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader)}")



Training with L1 regularization (Optimizer)
Epoch [1/10], Loss: 165.04669213673426
Epoch [2/10], Loss: 90.22653452555339
Epoch [3/10], Loss: 89.63100348578558
Epoch [4/10], Loss: 89.74948422870939
Epoch [5/10], Loss: 89.58445921398345
Epoch [6/10], Loss: 89.67096092587425
Epoch [7/10], Loss: 89.81757693820529
Epoch [8/10], Loss: 89.5268781147306
Epoch [9/10], Loss: 89.5874763367668
Epoch [10/10], Loss: 89.68427034408327


In [22]:
#question 3

class SimpleCNNWithDropout(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(SimpleCNNWithDropout, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 32 * 32, 512)  
        self.fc2 = nn.Linear(512, 2)
        self.pool = nn.MaxPool2d(2, 2)
        
        self.dropout = nn.Dropout(dropout_rate) if dropout_rate > 0 else nn.Identity()  
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 128 * 32 * 32) 
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)  
        x = self.fc2(x)
        return x

transform = transforms.Compose([
    transforms.Resize((256, 256)),  
    transforms.ToTensor(),          
])

train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'validation')

train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
val_dataset = datasets.ImageFolder(root=val_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

def train_model(dropout_rate=0.0, epochs=10):
    model = SimpleCNNWithDropout(dropout_rate=dropout_rate).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()  
            outputs = model(inputs) 
            loss = criterion(outputs, labels) 

            loss.backward() 
            optimizer.step()  

            running_loss += loss.item()

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

        val_loss /= len(val_loader)  

        print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {running_loss/len(train_loader):.4f}, "
              f"Validation Loss: {val_loss:.4f}")

print("Training with Dropout Regularization (Dropout rate = 0.5):")
train_model(dropout_rate=0.5)

print("\nTraining without Dropout Regularization:")
train_model(dropout_rate=0.0)


Training with Dropout Regularization (Dropout rate = 0.5):
Epoch [1/10], Train Loss: 0.9103, Validation Loss: 0.6932
Epoch [2/10], Train Loss: 0.6934, Validation Loss: 0.6931
Epoch [3/10], Train Loss: 0.6932, Validation Loss: 0.6932
Epoch [4/10], Train Loss: 0.6934, Validation Loss: 0.6932
Epoch [5/10], Train Loss: 0.6932, Validation Loss: 0.6931
Epoch [6/10], Train Loss: 0.6932, Validation Loss: 0.6932
Epoch [7/10], Train Loss: 0.6933, Validation Loss: 0.6931
Epoch [8/10], Train Loss: 0.6931, Validation Loss: 0.6931
Epoch [9/10], Train Loss: 0.6934, Validation Loss: 0.6933
Epoch [10/10], Train Loss: 0.6933, Validation Loss: 0.6932

Training without Dropout Regularization:
Epoch [1/10], Train Loss: 0.8885, Validation Loss: 0.6938
Epoch [2/10], Train Loss: 0.6904, Validation Loss: 0.6912
Epoch [3/10], Train Loss: 0.6814, Validation Loss: 0.6919
Epoch [4/10], Train Loss: 0.6736, Validation Loss: 0.6876
Epoch [5/10], Train Loss: 0.6531, Validation Loss: 0.6997
Epoch [6/10], Train Loss: 0.

In [None]:
# question 4

class CustomDropout(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(CustomDropout, self).__init__()
        self.dropout_rate = dropout_rate
    
    def forward(self, x):
        if self.training:  
            # Bernoulli distribution: 1 - dropout_rate gives us the probability of "keeping" a neuron
            mask = torch.bernoulli(torch.full(x.shape, 1 - self.dropout_rate, device=x.device))
            x = x * mask 
            x = x / (1 - self.dropout_rate)  
        return x

class SimpleCNNWithCustomDropout(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(SimpleCNNWithCustomDropout, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 32 * 32, 512)  
        self.fc2 = nn.Linear(512, 2)
        self.pool = nn.MaxPool2d(2, 2)
        self.custom_dropout = CustomDropout(dropout_rate) 
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 128 * 32 * 32)  
        x = torch.relu(self.fc1(x))
        x = self.custom_dropout(x)  
        x = self.fc2(x)
        return x

class SimpleCNNWithLibraryDropout(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(SimpleCNNWithLibraryDropout, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 32 * 32, 512)  
        self.fc2 = nn.Linear(512, 2)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(dropout_rate)  
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 128 * 32 * 32)  
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)  
        x = self.fc2(x)
        return x

transform = transforms.Compose([
    transforms.Resize((256, 256)),  
    transforms.ToTensor(),         
])

train_dir = os.path.join(data_dir, 'train')
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

def train_model(model):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    epochs = 10
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()  
            outputs = model(inputs)  
            loss = criterion(outputs, labels)  

            loss.backward()  
            optimizer.step()  

            running_loss += loss.item()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader)}")

print("Training with Custom Dropout Regularization:")
model_with_custom_dropout = SimpleCNNWithCustomDropout(dropout_rate=0.5).to(device)
train_model(model_with_custom_dropout)



Training with Custom Dropout Regularization:
Epoch [1/10], Loss: 0.8477671392380245
Epoch [2/10], Loss: 0.6873678583947439
Epoch [3/10], Loss: 0.6882958904145255
Epoch [4/10], Loss: 0.6904015446466113
Epoch [5/10], Loss: 0.6718009663006616
Epoch [6/10], Loss: 0.6560739458553375
Epoch [7/10], Loss: 0.6306019141560509
Epoch [8/10], Loss: 0.5971986788605886
Epoch [9/10], Loss: 0.5232475333743625
Epoch [10/10], Loss: 0.47641435880509636


In [None]:
#questions 5

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64*64*64, 512)
        self.fc2 = nn.Linear(512, 2)
        self.pool = nn.MaxPool2d(2, 2)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64*64*64)  
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

transform = transforms.Compose([
    transforms.Resize((256, 256)),  
    transforms.ToTensor(),          
])

train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'validation')
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
val_dataset = datasets.ImageFolder(root=val_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

class EarlyStopping:
    def __init__(self, patience=5, min_delta=0.01):
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = np.inf
        self.counter = 0
        self.early_stop = False

    def __call__(self, val_loss):
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

def train_with_early_stopping(model, criterion, optimizer, epochs=20):
    early_stopping = EarlyStopping(patience=5, min_delta=0.01)
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()  
            outputs = model(inputs)  
            loss = criterion(outputs, labels) 

            loss.backward()  
            optimizer.step()  

            running_loss += loss.item()

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

        val_loss /= len(val_loader)  

        print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {running_loss/len(train_loader):.4f}, "
              f"Validation Loss: {val_loss:.4f}")

        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Early stopping triggered.")
            break

model_with_early_stopping = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_with_early_stopping.parameters(), lr=0.001)

print("Training with Early Stopping:")
train_with_early_stopping(model_with_early_stopping, criterion, optimizer, epochs=10)


Training with Early Stopping:
Epoch [1/10], Train Loss: 1.3746, Validation Loss: 0.6885
Epoch [2/10], Train Loss: 0.6825, Validation Loss: 0.6852
Epoch [3/10], Train Loss: 0.6404, Validation Loss: 0.6602
Epoch [4/10], Train Loss: 0.5381, Validation Loss: 0.6940
Epoch [5/10], Train Loss: 0.3652, Validation Loss: 0.7292
Epoch [6/10], Train Loss: 0.1760, Validation Loss: 0.8892
Epoch [7/10], Train Loss: 0.0691, Validation Loss: 1.1959
Epoch [8/10], Train Loss: 0.0378, Validation Loss: 1.5693
Early stopping triggered.
