In [2]:
# Q1
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_dataset = ImageFolder('cats_and_dogs_filtered/train', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

class CatDogClassifier(nn.Module):
    def __init__(self):
        super(CatDogClassifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, 2)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, kernel_size=2, stride=2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, kernel_size=2, stride=2)
        x = x.view(-1, 64 * 56 * 56)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CatDogClassifier()

device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

def train_model(model, train_loader, optimizer, criterion, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

train_model(model, train_loader, optimizer, criterion, num_epochs=5)

for name, param in model.named_parameters():
    if 'weight' in name:
        print(f'Layer: {name}, L2 norm: {torch.norm(param)}')


Epoch 1, Loss: 0.6946445637279086
Epoch 2, Loss: 0.6918036161907135
Epoch 3, Loss: 0.6898864971266853
Epoch 4, Loss: 0.6834569138193888
Epoch 5, Loss: 0.6850727588411362
Layer: conv1.weight, L2 norm: 3.3163514137268066
Layer: conv2.weight, L2 norm: 4.604287147521973
Layer: fc1.weight, L2 norm: 6.507177829742432
Layer: fc2.weight, L2 norm: 0.798703670501709


In [3]:

def train_model_with_l2_norm(model, train_loader, optimizer, criterion, l2_lambda=0.001, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
           
            l2_regularization = 0
            for param in model.parameters():
                if param.requires_grad and len(param.shape) > 1:  
                    l2_regularization += torch.norm(param)**2
            loss += 0.5 * l2_lambda * l2_regularization
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

model_b = CatDogClassifier()
model_b.to(device)

optimizer_b = optim.SGD(model_b.parameters(), lr=0.001, momentum=0.9)

train_model_with_l2_norm(model_b, train_loader, optimizer_b, criterion, l2_lambda=0.001, num_epochs=5)

for name, param in model_b.named_parameters():
    if 'weight' in name:
        print(f'Layer: {name}, L2 norm: {torch.norm(param)}')


Epoch 1, Loss: 0.7358111001196361
Epoch 2, Loss: 0.73061437171603
Epoch 3, Loss: 0.7294252732443431
Epoch 4, Loss: 0.7262737013044811
Epoch 5, Loss: 0.7236294765321035
Layer: conv1.weight, L2 norm: 3.310598134994507
Layer: conv2.weight, L2 norm: 4.611597537994385
Layer: fc1.weight, L2 norm: 6.506830215454102
Layer: fc2.weight, L2 norm: 0.7900124788284302


In [1]:
# Q3
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_dataset = ImageFolder('./cats_and_dogs_filtered/train', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

class CatDogClassifierWithDropout(nn.Module):
    def __init__(self):
        super(CatDogClassifierWithDropout, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, 2)
        self.dropout = nn.Dropout(0.5)  # Dropout probability of 0.5

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, kernel_size=2, stride=2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, kernel_size=2, stride=2)
        x = x.view(-1, 64 * 56 * 56)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)  # Applying dropout
        x = self.fc2(x)
        return x

model_with_dropout = CatDogClassifierWithDropout()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_with_dropout.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_with_dropout.parameters(), lr=0.001, momentum=0.9)

def train_model(model, train_loader, optimizer, criterion, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

train_model(model_with_dropout, train_loader, optimizer, criterion, num_epochs=5)


Epoch 1, Loss: 0.6963799510683332
Epoch 2, Loss: 0.6902804895052834
Epoch 3, Loss: 0.6915045673885043
Epoch 4, Loss: 0.6893836193614535
Epoch 5, Loss: 0.684991787350367


In [4]:
# Q2
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])


train_dataset = ImageFolder('cats_and_dogs_filtered/train', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

class CatDogClassifier(nn.Module):
    def __init__(self):
        super(CatDogClassifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, 2)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, kernel_size=2, stride=2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, kernel_size=2, stride=2)
        x = x.view(-1, 64 * 56 * 56)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CatDogClassifier()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

def train_model(model, train_loader, optimizer, criterion, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

train_model(model, train_loader, optimizer, criterion, num_epochs=5)

for name, param in model.named_parameters():
    if 'weight' in name:
        print(f'Layer: {name}, L1 norm: {torch.norm(param, p=1)}')

def train_model_with_l1_norm(model, train_loader, optimizer, criterion, l1_lambda=0.001, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
        
            l1_regularization = 0
            for param in model.parameters():
                if param.requires_grad and len(param.shape) > 1: 
                    l1_regularization += torch.norm(param, p=1)
            loss += l1_lambda * l1_regularization
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

model_b = CatDogClassifier()
model_b.to(device)

optimizer_b = optim.SGD(model_b.parameters(), lr=0.001, momentum=0.9)

train_model_with_l1_norm(model_b, train_loader, optimizer_b, criterion, l1_lambda=0.001, num_epochs=5)

for name, param in model_b.named_parameters():
    if 'weight' in name:
        print(f'Layer: {name}, L1 norm: {torch.norm(param, p=1)}')


Epoch 1, Loss: 0.6948544714185927
Epoch 2, Loss: 0.6922421748675998
Epoch 3, Loss: 0.687655974948217
Epoch 4, Loss: 0.6848643251827785
Epoch 5, Loss: 0.679541686224559
Layer: conv1.weight, L1 norm: 81.09809112548828
Layer: conv2.weight, L1 norm: 542.2473754882812
Layer: fc1.weight, L1 norm: 28595.328125
Layer: fc2.weight, L1 norm: 11.33641242980957
Epoch 1, Loss: 24.443062857976035
Epoch 2, Loss: 12.510858989897228
Epoch 3, Loss: 4.794409324252416
Epoch 4, Loss: 1.6177095042334662
Epoch 5, Loss: 1.3187158164523898
Layer: conv1.weight, L1 norm: 80.0750503540039
Layer: conv2.weight, L1 norm: 487.6818542480469
Layer: fc1.weight, L1 norm: 38.881874084472656
Layer: fc2.weight, L1 norm: 10.421439170837402


In [5]:
# Q4

import torch

class CustomDropout(torch.nn.Module):
    def __init__(self, p=0.5):
        super(CustomDropout, self).__init__()
        self.p = p

    def forward(self, x):
        if self.training:
            # Generate a binary mask with Bernoulli distribution
            mask = torch.bernoulli(torch.full_like(x, 1 - self.p))
            # Scale the remaining activations by 1 / (1 - p)
            return x * mask / (1 - self.p)
        else:
            return x
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader

# Define the custom dropout layer
class CustomDropout(torch.nn.Module):
    def __init__(self, p=0.5):
        super(CustomDropout, self).__init__()
        self.p = p

    def forward(self, x):
        if self.training:
            # Generate a binary mask with Bernoulli distribution
            mask = torch.bernoulli(torch.full_like(x, 1 - self.p))
            # Scale the remaining activations by 1 / (1 - p)
            return x * mask / (1 - self.p)
        else:
            return x

# Define a simple neural network model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.dropout1 = CustomDropout(p=0.5)
        self.fc2 = nn.Linear(512, 256)
        self.dropout2 = nn.Dropout(p=0.5)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Load MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize models
custom_model = NeuralNetwork()
library_model = NeuralNetwork()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
custom_optimizer = optim.Adam(custom_model.parameters(), lr=0.001)
library_optimizer = optim.Adam(library_model.parameters(), lr=0.001)

# Train custom dropout model
def train(model, optimizer):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

# Evaluate model accuracy
def evaluate(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

# Train and evaluate custom dropout model
for epoch in range(5):
    train(custom_model, custom_optimizer)
    custom_accuracy = evaluate(custom_model)
    print(f'Custom Dropout - Epoch [{epoch+1}/5], Accuracy: {custom_accuracy:.4f}')

# Train and evaluate library dropout model
for epoch in range(5):
    train(library_model, library_optimizer)
    library_accuracy = evaluate(library_model)
    print(f'Library Dropout - Epoch [{epoch+1}/5], Accuracy: {library_accuracy:.4f}')


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|████████████████████████████| 9912422/9912422 [00:07<00:00, 1252167.73it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████████████████████████████| 28881/28881 [00:00<00:00, 46392.73it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|█████████████████████████████| 1648877/1648877 [00:04<00:00, 402908.29it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|█████████████████████████████████| 4542/4542 [00:00<00:00, 20888737.68it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Custom Dropout - Epoch [1/5], Accuracy: 0.9394
Custom Dropout - Epoch [2/5], Accuracy: 0.9510
Custom Dropout - Epoch [3/5], Accuracy: 0.9577
Custom Dropout - Epoch [4/5], Accuracy: 0.9610
Custom Dropout - Epoch [5/5], Accuracy: 0.9629
Library Dropout - Epoch [1/5], Accuracy: 0.9334
Library Dropout - Epoch [2/5], Accuracy: 0.9501
Library Dropout - Epoch [3/5], Accuracy: 0.9569
Library Dropout - Epoch [4/5], Accuracy: 0.9610
Library Dropout - Epoch [5/5], Accuracy: 0.9649


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms
from torchvision.datasets import MNIST

# Define the neural network architecture
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(28*28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = x.view(-1, 28*28)  # Flatten the input
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return self.softmax(x)

# Load MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset = MNIST(root='./data', train=True, transform=transform, download=True)

# Split dataset into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# Define the neural network
model = NeuralNetwork()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Early stopping parameters
patience = 3  # Number of epochs to wait if validation loss increases
best_val_loss = float('inf')
counter = 0  # Counter for patience

# Train the model
for epoch in range(10):  # Train for 10 epochs
    model.train()
    train_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * inputs.size(0)
    train_loss /= len(train_loader.dataset)

    # Evaluate on validation set
    model.eval()
    val_loss = 0.0
    for inputs, labels in val_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        val_loss += loss.item() * inputs.size(0)
    val_loss /= len(val_loader.dataset)

    print(f'Epoch [{epoch+1}/10], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

    # Check for early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered. Stopping training.")
            break

# Evaluate the model on the test set
test_dataset = MNIST(root='./data', train=False, transform=transform, download=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Test Accuracy: {accuracy:.4f}')


Epoch [1/10], Train Loss: 1.6116, Val Loss: 1.5833
Epoch [2/10], Train Loss: 1.5431, Val Loss: 1.5367
Epoch [3/10], Train Loss: 1.5277, Val Loss: 1.5203
Epoch [4/10], Train Loss: 1.5191, Val Loss: 1.5163
Epoch [5/10], Train Loss: 1.5127, Val Loss: 1.5132
Epoch [6/10], Train Loss: 1.5117, Val Loss: 1.5129
Epoch [7/10], Train Loss: 1.5082, Val Loss: 1.5139
Epoch [8/10], Train Loss: 1.5057, Val Loss: 1.5035
Epoch [9/10], Train Loss: 1.5045, Val Loss: 1.5142
Epoch [10/10], Train Loss: 1.5037, Val Loss: 1.5038
Test Accuracy: 0.9598
