In [1]:
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import time

In [2]:
# Define transformation
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Load training and test sets
train_dataset = MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = MNIST(root='./data', train=False, download=True, transform=transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1000)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:01<00:00, 5.42MB/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1000)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 1.51MB/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1000)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 7.42MB/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1000)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<?, ?B/s]

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






# 1

In [5]:
class BaselineModel(nn.Module):
    def __init__(self):
        super(BaselineModel, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
    
    def forward(self, x):
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize model, loss, and optimizer
model = BaselineModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)


In [7]:
def train_model(model, optimizer, criterion, num_epochs=5):
    start_time = time.time()
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")
    
    exec_time = time.time() - start_time
    return exec_time

# Train baseline model and calculate execution time
baseline_exec_time = train_model(model, optimizer, criterion)


Epoch [1/5], Loss: 1.0660
Epoch [2/5], Loss: 0.3901
Epoch [3/5], Loss: 0.3255
Epoch [4/5], Loss: 0.2914
Epoch [5/5], Loss: 0.2659


In [9]:
def evaluate_model(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

# Evaluate baseline model
baseline_accuracy = evaluate_model(model)
print(f"Baseline Model Accuracy: {baseline_accuracy:.2f}%, Execution Time: {baseline_exec_time:.2f} seconds")

Baseline Model Accuracy: 92.78%, Execution Time: 30.89 seconds


# 2

In [13]:
# Define CustomActivationModel
class CustomActivationModel(nn.Module):
    def __init__(self, activation_fn):
        super(CustomActivationModel, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 128)  # Assuming input images are 28x28, like in the MNIST dataset
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)      # Output layer for a 10-class classification problem
        self.activation_fn = activation_fn
    
    def forward(self, x):
        x = self.flatten(x)
        x = self.activation_fn(self.fc1(x))  # Applies the custom activation function to the first layer
        x = self.activation_fn(self.fc2(x))  # Applies the custom activation function to the second layer
        x = self.fc3(x)                      # No activation function on the output layer (e.g., for classification tasks)
        return x

In [23]:
# Define the activation functions: one linear and two non-linear
activations = {
    'Identity': lambda x: x,  # Linear activation function
    'ReLU': torch.relu,       # Non-linear activation function
    'Tanh': torch.tanh        # Non-linear activation function
}

# Dictionary to store the results
results = []

# Loop through each activation function
for name, activation_fn in activations.items():
    # Initialize the model with the specific activation function
    model = CustomActivationModel(activation_fn)
    optimizer = optim.SGD(model.parameters(), lr=0.01)
    
    # Measure training time
    start_time = time.time()
    exec_time = train_model(model, optimizer, criterion)
    end_time = time.time()
    training_duration = end_time - start_time
    
    # Evaluate the model to get accuracy
    accuracy = evaluate_model(model)
    
    # Store results
    results.append({
        "Activation Function": name,
        "Accuracy": accuracy,
        "Execution Time": training_duration
    })
    
    # Print the results for this activation function
    print(f"Activation Function: {name}, Accuracy: {accuracy:.2f}%, Execution Time: {training_duration:.2f} seconds")

# Final Results
print("\nSummary of Results:")
for result in results:
    print(f"Activation: {result['Activation Function']}, Accuracy: {result['Accuracy']:.2f}%, Execution Time: {result['Execution Time']:.2f} seconds")



Epoch [1/5], Loss: 0.8168
Epoch [2/5], Loss: 0.3669
Epoch [3/5], Loss: 0.3285
Epoch [4/5], Loss: 0.3131
Epoch [5/5], Loss: 0.3042
Activation Function: Identity, Accuracy: 91.72%, Execution Time: 29.47 seconds
Epoch [1/5], Loss: 1.0329
Epoch [2/5], Loss: 0.3847
Epoch [3/5], Loss: 0.3233
Epoch [4/5], Loss: 0.2915
Epoch [5/5], Loss: 0.2675
Activation Function: ReLU, Accuracy: 92.37%, Execution Time: 29.51 seconds
Epoch [1/5], Loss: 1.0227
Epoch [2/5], Loss: 0.4168
Epoch [3/5], Loss: 0.3323
Epoch [4/5], Loss: 0.2932
Epoch [5/5], Loss: 0.2665
Activation Function: Tanh, Accuracy: 92.60%, Execution Time: 29.37 seconds

Summary of Results:
Activation: Identity, Accuracy: 91.72%, Execution Time: 29.47 seconds
Activation: ReLU, Accuracy: 92.37%, Execution Time: 29.51 seconds
Activation: Tanh, Accuracy: 92.60%, Execution Time: 29.37 seconds


# 3

In [21]:
# Define the optimizers
optimizers = {
    'SGD': lambda params: optim.SGD(params, lr=0.01),
    'Adam': lambda params: optim.Adam(params, lr=0.001),
    'RMSprop': lambda params: optim.RMSprop(params, lr=0.001)
}

# Dictionary to store the results
results = []

# Loop through each optimizer
for name, optimizer_fn in optimizers.items():
    # Initialize the model and the optimizer for each experiment
    model = CustomActivationModel(torch.relu)  # Using ReLU as the activation function for consistency
    optimizer = optimizer_fn(model.parameters())
    
    # Measure training time
    start_time = time.time()
    exec_time = train_model(model, optimizer, criterion)
    end_time = time.time()
    training_duration = end_time - start_time
    
    # Evaluate the model to get accuracy
    accuracy = evaluate_model(model)
    
    # Store results
    results.append({
        "Optimizer": name,
        "Accuracy": accuracy,
        "Execution Time": training_duration
    })
    
    # Print the results for this optimizer
    print(f"Optimizer: {name}, Accuracy: {accuracy:.2f}%, Execution Time: {training_duration:.2f} seconds")

# Final Results Summary
print("\nSummary of Results:")
for result in results:
    print(f"Optimizer: {result['Optimizer']}, Accuracy: {result['Accuracy']:.2f}%, Execution Time: {result['Execution Time']:.2f} seconds")


Epoch [1/5], Loss: 0.9902
Epoch [2/5], Loss: 0.3823
Epoch [3/5], Loss: 0.3236
Epoch [4/5], Loss: 0.2884
Epoch [5/5], Loss: 0.2623
Optimizer: SGD, Accuracy: 92.33%, Execution Time: 30.71 seconds
Epoch [1/5], Loss: 0.4109
Epoch [2/5], Loss: 0.2005
Epoch [3/5], Loss: 0.1468
Epoch [4/5], Loss: 0.1151
Epoch [5/5], Loss: 0.0995
Optimizer: Adam, Accuracy: 96.83%, Execution Time: 43.21 seconds
Epoch [1/5], Loss: 0.3818
Epoch [2/5], Loss: 0.1864
Epoch [3/5], Loss: 0.1368
Epoch [4/5], Loss: 0.1102
Epoch [5/5], Loss: 0.0923
Optimizer: RMSprop, Accuracy: 97.16%, Execution Time: 33.95 seconds

Summary of Results:
Optimizer: SGD, Accuracy: 92.33%, Execution Time: 30.71 seconds
Optimizer: Adam, Accuracy: 96.83%, Execution Time: 43.21 seconds
Optimizer: RMSprop, Accuracy: 97.16%, Execution Time: 33.95 seconds


# 4

In [25]:
# Define the Dropout model
class DropoutModel(nn.Module):
    def __init__(self):
        super(DropoutModel, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 128)
        self.dropout1 = nn.Dropout(0.5)  # Dropout with 50% rate
        self.fc2 = nn.Linear(128, 64)
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(64, 10)
    
    def forward(self, x):
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Define the BatchNorm model
class BatchNormModel(nn.Module):
    def __init__(self):
        super(BatchNormModel, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 128)
        self.bn1 = nn.BatchNorm1d(128)  # BatchNorm after first layer
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 10)
    
    def forward(self, x):
        x = self.flatten(x)
        x = torch.relu(self.bn1(self.fc1(x)))
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.fc3(x)
        return x

# Define the Weight Initialization model
class WeightInitModel(nn.Module):
    def __init__(self):
        super(WeightInitModel, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        
        # Apply Xavier (Glorot) initialization
        self._initialize_weights()
    
    def forward(self, x):
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def _initialize_weights(self):
        init.xavier_uniform_(self.fc1.weight)
        init.xavier_uniform_(self.fc2.weight)
        init.xavier_uniform_(self.fc3.weight)

# Dictionary to store the results
results = []

# Define models to test
models = {
    'Dropout': DropoutModel(),
    'BatchNorm': BatchNormModel(),
    'WeightInit': WeightInitModel()
}

# Loop through each model
for name, model in models.items():
    optimizer = optim.SGD(model.parameters(), lr=0.01)
    
    # Measure training time
    start_time = time.time()
    exec_time = train_model(model, optimizer, criterion)
    end_time = time.time()
    training_duration = end_time - start_time
    
    # Evaluate the model to get accuracy
    accuracy = evaluate_model(model)
    
    # Store results
    results.append({
        "Model": name,
        "Accuracy": accuracy,
        "Execution Time": training_duration
    })
    
    # Print the results for this model
    print(f"Model: {name}, Accuracy: {accuracy:.2f}%, Execution Time: {training_duration:.2f} seconds")

# Final Results Summary
print("\nSummary of Results:")
for result in results:
    print(f"Model: {result['Model']}, Accuracy: {result['Accuracy']:.2f}%, Execution Time: {result['Execution Time']:.2f} seconds")



Epoch [1/5], Loss: 1.5487
Epoch [2/5], Loss: 0.7958
Epoch [3/5], Loss: 0.6195
Epoch [4/5], Loss: 0.5351
Epoch [5/5], Loss: 0.4880
Model: Dropout, Accuracy: 92.36%, Execution Time: 30.98 seconds
Epoch [1/5], Loss: 0.5722
Epoch [2/5], Loss: 0.2195
Epoch [3/5], Loss: 0.1557
Epoch [4/5], Loss: 0.1210
Epoch [5/5], Loss: 0.0996
Model: BatchNorm, Accuracy: 97.36%, Execution Time: 29.36 seconds
Epoch [1/5], Loss: 0.5805
Epoch [2/5], Loss: 0.3061
Epoch [3/5], Loss: 0.2560
Epoch [4/5], Loss: 0.2244
Epoch [5/5], Loss: 0.2018
Model: WeightInit, Accuracy: 94.48%, Execution Time: 30.62 seconds

Summary of Results:
Model: Dropout, Accuracy: 92.36%, Execution Time: 30.98 seconds
Model: BatchNorm, Accuracy: 97.36%, Execution Time: 29.36 seconds
Model: WeightInit, Accuracy: 94.48%, Execution Time: 30.62 seconds
