Create a neural network with at least two hidden layers for a classification task. The dataset should be CIFAR10.
Experiment with three activation functions (one linear and one non-linear) and report (i) accuracy and (ii) execution time.

In [1]:
pip install torch torchvision numpy

Note: you may need to restart the kernel to use updated packages.


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import time
from torchvision import datasets, transforms

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transformation (No resize, as CIFAR-10 is 32x32)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalization for CIFAR-10
])

# Load CIFAR-10 dataset
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

# Neural Network class with flexible activation function
class SimpleNN(nn.Module):
    def __init__(self, activation_function):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(3072, 128)  # Adjusted input size to 3072 (32x32x3)
        self.fc2 = nn.Linear(128, 10)   # Hidden to output layer
        self.activation_function = activation_function

    def forward(self, x):
        x = x.view(-1, 3072)  # Flatten the input to 3072
        x = self.activation_function(self.fc1(x))
        x = self.fc2(x)
        return x

# Training function
def train_model(model, criterion, optimizer, num_epochs=10):
    start_time = time.time()
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(trainloader):.4f}')
    
    end_time = time.time()
    return end_time - start_time

# Test function to calculate accuracy
def test_model(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# Experiment with activation functions
activation_functions = {
    'ReLU': nn.ReLU(),
    'Tanh': nn.Tanh(),
    'Identity (Linear)': nn.Identity()
}

results = {}

for name, activation_function in activation_functions.items():
    print(f"\nExperimenting with {name} activation function...")
    
    # Initialize model, loss function, and optimizer
    model = SimpleNN(activation_function).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model and calculate execution time
    exec_time = train_model(model, criterion, optimizer, num_epochs=10)
    
    # Test the model and calculate accuracy
    accuracy = test_model(model)
    
    # Store the results
    results[name] = (accuracy, exec_time)
    print(f"{name} - Accuracy: {accuracy:.2f}%, Execution Time: {exec_time:.2f}s")

# Display the results
print("\nFinal Results:")
for name, (accuracy, exec_time) in results.items():
    print(f"{name}: Accuracy: {accuracy:.2f}%, Execution Time: {exec_time:.2f}s")



Files already downloaded and verified
Files already downloaded and verified

Experimenting with ReLU activation function...
Epoch [1/10], Loss: 1.6442
Epoch [2/10], Loss: 1.4537
Epoch [3/10], Loss: 1.3751
Epoch [4/10], Loss: 1.3142
Epoch [5/10], Loss: 1.2602
Epoch [6/10], Loss: 1.2177
Epoch [7/10], Loss: 1.1785
Epoch [8/10], Loss: 1.1426
Epoch [9/10], Loss: 1.1120
Epoch [10/10], Loss: 1.0774
ReLU - Accuracy: 51.59%, Execution Time: 28.48s

Experimenting with Tanh activation function...
Epoch [1/10], Loss: 1.7578
Epoch [2/10], Loss: 1.6407
Epoch [3/10], Loss: 1.5859
Epoch [4/10], Loss: 1.5459
Epoch [5/10], Loss: 1.5131
Epoch [6/10], Loss: 1.4881
Epoch [7/10], Loss: 1.4630
Epoch [8/10], Loss: 1.4455
Epoch [9/10], Loss: 1.4204
Epoch [10/10], Loss: 1.3971
Tanh - Accuracy: 45.70%, Execution Time: 28.16s

Experimenting with Identity (Linear) activation function...
Epoch [1/10], Loss: 1.8398
Epoch [2/10], Loss: 1.7581
Epoch [3/10], Loss: 1.7407
Epoch [4/10], Loss: 1.7266
Epoch [5/10], Loss: 1

Experiment with three optimizers and report (i) accuracy and (ii) execution time.


In [10]:

# Define a simple neural network with the correct input size
class ExperimentNN(nn.Module):
    def __init__(self):
        super(ExperimentNN, self).__init__()
        self.fc1 = nn.Linear(3072, 512)  # Adjust input size to 3072 (32x32x3)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 10)

    def forward(self, x):
        x = x.view(-1, 3072)  # Flatten the input to 3072
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Function to train and evaluate the model with different optimizers
def experiment(optimizer_name, optimizer_fn):
    model = ExperimentNN()
    criterion = nn.CrossEntropyLoss()
    optimizer = optimizer_fn(model.parameters())

    # Training the model
    start_time = time.time()
    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in trainloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    end_time = time.time()
    execution_time = end_time - start_time

    # Evaluate the model
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for inputs, labels in testloader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy, execution_time

# Experimenting with three optimizers
optimizers = {
    "SGD": lambda params: optim.SGD(params, lr=0.001, momentum=0.9),
    "Adam": lambda params: optim.Adam(params, lr=0.001),
    "RMSprop": lambda params: optim.RMSprop(params, lr=0.001)
}

results = {}
for name, optimizer_fn in optimizers.items():
    print(f"Experimenting with {name} optimizer...")
    accuracy, execution_time = experiment(name, optimizer_fn)
    results[name] = {"Accuracy": accuracy, "Execution Time (s)": execution_time}

# Display the results
for name, result in results.items():
    print(f"{name} - Accuracy: {result['Accuracy']:.2f}%, Execution Time: {result['Execution Time (s)']:.2f}s")


Files already downloaded and verified
Files already downloaded and verified
Experimenting with SGD optimizer...
Experimenting with Adam optimizer...
Experimenting with RMSprop optimizer...
SGD - Accuracy: 52.14%, Execution Time: 37.92s
Adam - Accuracy: 52.37%, Execution Time: 47.70s
RMSprop - Accuracy: 51.58%, Execution Time: 43.50s


Experiment with “Dropout layer”, “BatchNorm” and “Weight initialization” and
report changes in accuracy and execution time.

In [5]:

# Define a neural network with options for Dropout, BatchNorm, and Weight Initialization
class ExperimentNN(nn.Module):
    def __init__(self, use_dropout=False, use_batchnorm=False, use_weight_init=False):
        super(ExperimentNN, self).__init__()
        self.fc1 = nn.Linear(3072, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)
        
        self.use_dropout = use_dropout
        self.use_batchnorm = use_batchnorm
        if self.use_batchnorm:
            self.bn1 = nn.BatchNorm1d(512)
            self.bn2 = nn.BatchNorm1d(256)
        
        if self.use_dropout:
            self.dropout = nn.Dropout(0.5)
        
        if use_weight_init:
            self._initialize_weights()

    def forward(self, x):
        x = x.view(-1, 3072)  # Flatten the input
        
        x = torch.relu(self.fc1(x))
        if self.use_batchnorm:
            x = self.bn1(x)
        if self.use_dropout:
            x = self.dropout(x)
        
        x = torch.relu(self.fc2(x))
        if self.use_batchnorm:
            x = self.bn2(x)
        if self.use_dropout:
            x = self.dropout(x)
        
        x = self.fc3(x)
        return x

    def _initialize_weights(self):
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.xavier_uniform_(self.fc3.weight)

# Function to train and evaluate the model with different configurations
def experiment(config_name, use_dropout=False, use_batchnorm=False, use_weight_init=False):
    model = ExperimentNN(use_dropout=use_dropout, use_batchnorm=use_batchnorm, use_weight_init=use_weight_init)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training the model
    start_time = time.time()
    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in trainloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    end_time = time.time()
    execution_time = end_time - start_time

    # Evaluate the model
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for inputs, labels in testloader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy, execution_time

# Experimenting with different settings
configs = {
    "Baseline": {"use_dropout": False, "use_batchnorm": False, "use_weight_init": False},
    "Dropout": {"use_dropout": True, "use_batchnorm": False, "use_weight_init": False},
    "BatchNorm": {"use_dropout": False, "use_batchnorm": True, "use_weight_init": False},
    "Weight Initialization": {"use_dropout": False, "use_batchnorm": False, "use_weight_init": True},
    "Dropout + BatchNorm + Weight Init": {"use_dropout": True, "use_batchnorm": True, "use_weight_init": True},
}

results = {}
for config_name, config in configs.items():
    print(f"Experimenting with {config_name}...")
    accuracy, execution_time = experiment(
        config_name, 
        use_dropout=config["use_dropout"], 
        use_batchnorm=config["use_batchnorm"], 
        use_weight_init=config["use_weight_init"]
    )
    results[config_name] = {"Accuracy": accuracy, "Execution Time (s)": execution_time}

# Display the results
for name, result in results.items():
    print(f"{name} - Accuracy: {result['Accuracy']:.2f}%, Execution Time: {result['Execution Time (s)']:.2f}s")


Files already downloaded and verified
Files already downloaded and verified
Experimenting with Baseline...
Experimenting with Dropout...
Experimenting with BatchNorm...
Experimenting with Weight Initialization...
Experimenting with Dropout + BatchNorm + Weight Init...
Baseline - Accuracy: 49.46%, Execution Time: 156.44s
Dropout - Accuracy: 40.25%, Execution Time: 157.14s
BatchNorm - Accuracy: 51.09%, Execution Time: 158.06s
Weight Initialization - Accuracy: 47.28%, Execution Time: 156.45s
Dropout + BatchNorm + Weight Init - Accuracy: 46.94%, Execution Time: 160.24s
