# Assignment 2: Convolutional Neural Networks and AutoML
---

# Import Required Libraries
Import the necessary libraries, including PyTorch and the chosen hyperparameter optimization library.

In [13]:
# Importing necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm
from support import load_dataset

# Importing hyperparameter optimization library
import optuna
# from hyperopt import hp, fmin, tpe, STATUS_OK, Trials

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load and Preprocess the Wefabricate Dataset
Load the Wefabricate dataset and preprocess it for use with a CNN.

In [14]:
train_data, test_data = load_dataset()

# Create data loaders
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# Define the CNN Model
Define a CNN to classify product images in the Wefabricate dataset.

In [None]:
# Define the CNN Model
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 6, 5)  # Input channels = 3, output channels = 6, kernel size = 5
        self.pool = nn.MaxPool2d(2, 2)  # Max pooling over a (2, 2) window
        self.conv2 = nn.Conv2d(6, 16, 5)  # Input channels = 6, output channels = 16, kernel size = 5
        
        # Fully connected layers
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5*5 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)  # 10 output classes

    def forward(self, x):
        # Pass data through conv1
        x = self.pool(F.relu(self.conv1(x)))
        # Pass data through conv2
        x = self.pool(F.relu(self.conv2(x)))
        # Flatten data
        x = x.view(-1, 16 * 5 * 5)
        # Pass data through fc1
        x = F.relu(self.fc1(x))
        # Pass data through fc2
        x = F.relu(self.fc2(x))
        # Pass data through fc3
        x = self.fc3(x)
        return x

# Instantiate the network
model = ConvNet()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Train the CNN Model
Train the CNN model on the training set.

In [12]:
# Move the model to the device
model.to(device)
model.train()

# Training the CNN Model
for epoch in range(2):

    running_loss = 0.0
    for i, data in tqdm(enumerate(train_loader, 0)):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    # Print the loss every epoch
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

print("Finished Training")

5it [00:23,  4.75s/it]


Epoch 1, Loss: 2.3364688396453857


5it [00:21,  4.34s/it]

Epoch 2, Loss: 2.3165061473846436
Finished Training





# Test the CNN Model
Test the trained CNN model on the test set.

In [None]:
# Test the CNN Model
correct = 0
total = 0
# Since we're testing, we don't need to calculate gradients
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        # Calculate outputs by running images through the network
        outputs = model(images)
        # The class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %d %%' % (
    100 * correct / total))

# Hyperparameter Selection
Choose five hyperparameters to tune.

In [None]:
# Define the hyperparameters to tune
hyperparameters = {
    'learning_rate': [0.1, 0.01, 0.001, 0.0001],  # Learning rate for the optimizer
    'batch_size': [16, 32, 64, 128],  # Batch size for the data loaders
    'num_epochs': [5, 10, 15, 20],  # Number of epochs for training
    'optimizer': ['SGD', 'Adam'],  # Type of optimizer
    'momentum': [0.9, 0.95, 0.99]  # Momentum for SGD optimizer
}

# Hyperparameter Optimization with Random Search
Use random search to tune the chosen hyperparameters.

In [None]:
# Hyperparameter Optimization with Random Search
from sklearn.model_selection import ParameterSampler
from torch.optim import SGD, Adam

# Define the number of iterations for the random search
n_iter_search = 20

# Create a random grid
random_grid = ParameterSampler(hyperparameters, n_iter=n_iter_search)

# Function to train and evaluate a model
def train_and_evaluate_model(params):
    # Create a new model
    model = ConvNet()
    
    # Choose the optimizer
    if params['optimizer'] == 'SGD':
        optimizer = SGD(model.parameters(), lr=params['learning_rate'], momentum=params['momentum'])
    else:
        optimizer = Adam(model.parameters(), lr=params['learning_rate'])
    
    # Create data loaders
    train_loader = DataLoader(train_data, batch_size=params['batch_size'], shuffle=True)
    test_loader = DataLoader(test_data, batch_size=params['batch_size'], shuffle=False)
    
    # Train the model
    for epoch in range(params['num_epochs']):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
    
    # Test the model
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    # Calculate accuracy
    accuracy = correct / total
    return accuracy

# Perform the random search
best_params = None
best_accuracy = 0
for params in random_grid:
    accuracy = train_and_evaluate_model(params)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = params

print('Best parameters found by random search:', best_params)
print('Best accuracy found by random search:', best_accuracy)

# Hyperparameter Optimization with Sophisticated Method
Use a more sophisticated method than random search to tune the chosen hyperparameters.

In [None]:
# Hyperparameter Optimization with Sophisticated Method

# Define the objective function for Optuna
def objective(trial):
    # Define the hyperparameters for this trial
    params = {
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-1),
        'batch_size': trial.suggest_categorical('batch_size', [16, 32, 64, 128]),
        'num_epochs': trial.suggest_categorical('num_epochs', [5, 10, 15, 20]),
        'optimizer': trial.suggest_categorical('optimizer', ['SGD', 'Adam']),
        'momentum': trial.suggest_uniform('momentum', 0.9, 0.99)
    }
    
    # Train and evaluate a model with these hyperparameters
    accuracy = train_and_evaluate_model(params)
    
    # The objective is to maximize accuracy
    return accuracy

# Create a study to optimize the hyperparameters
study = optuna.create_study(direction='maximize')

# Run the hyperparameter optimization
study.optimize(objective, n_trials=50)

# Print the result
best_params = study.best_params
best_accuracy = study.best_value

print('Best parameters found by sophisticated search:', best_params)
print('Best accuracy found by sophisticated search:', best_accuracy)

# 5-Fold Cross-Validation
Perform 5-fold cross-validation on the training set to calculate the validation accuracy for the hyperparameters.

In [None]:
# Import necessary libraries
from sklearn.model_selection import KFold
import numpy as np

# Define the number of folds for cross-validation
num_folds = 5

# Define the KFold object
kf = KFold(n_splits=num_folds)

# Convert the train data to numpy arrays for easier indexing
train_images = np.array(train_data.imgs)
train_labels = np.array(train_data.targets)

# Initialize a list to store the validation accuracies for each fold
validation_accuracies = []

# Perform 5-fold cross-validation
for train_index, val_index in kf.split(train_images):
    # Split the data into the training set and the validation set
    train_images_fold, val_images_fold = train_images[train_index], train_images[val_index]
    train_labels_fold, val_labels_fold = train_labels[train_index], train_labels[val_index]
    
    # Convert the numpy arrays back to datasets
    train_data_fold = torch.utils.data.TensorDataset(torch.from_numpy(train_images_fold), torch.from_numpy(train_labels_fold))
    val_data_fold = torch.utils.data.TensorDataset(torch.from_numpy(val_images_fold), torch.from_numpy(val_labels_fold))
    
    # Create data loaders for this fold
    train_loader_fold = DataLoader(train_data_fold, batch_size=64, shuffle=True)
    val_loader_fold = DataLoader(val_data_fold, batch_size=64, shuffle=False)
    
    # Train the model on this fold
    for epoch in range(10):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(train_loader_fold, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
    
    # Test the model on the validation set
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader_fold:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    # Calculate the accuracy for this fold and add it to the list of validation accuracies
    fold_accuracy = correct / total
    validation_accuracies.append(fold_accuracy)

# Calculate the average validation accuracy over all folds
average_validation_accuracy = sum(validation_accuracies) / num_folds

print('Average validation accuracy from 5-fold cross-validation:', average_validation_accuracy)

# Compare Results Before and After Hyperparameter Optimization
Compare the results obtained before and after automatic hyperparameter optimization in terms of accuracy.

In [None]:
# Compare Results Before and After Hyperparameter Optimization

# Store the accuracy before hyperparameter optimization
accuracy_before_optimization = 100 * correct / total

# Store the accuracy after hyperparameter optimization
accuracy_after_optimization = best_accuracy

# Print the results
print(f'Accuracy before hyperparameter optimization: {accuracy_before_optimization}%')
print(f'Accuracy after hyperparameter optimization: {accuracy_after_optimization}%')

# Compare the results
if accuracy_after_optimization > accuracy_before_optimization:
    print("The accuracy improved after hyperparameter optimization.")
elif accuracy_after_optimization < accuracy_before_optimization:
    print("The accuracy decreased after hyperparameter optimization.")
else:
    print("The accuracy did not change after hyperparameter optimization.")

# Save Model Weights Before and After Hyperparameter Tuning
Save the trained model weights before and after hyperparameter tuning.

In [None]:
# Save the model weights before hyperparameter tuning
torch.save(model.state_dict(), 'model_weights_before_tuning.pth')

# Load the model with the best parameters found by sophisticated search
best_model = ConvNet()
if best_params['optimizer'] == 'SGD':
    best_optimizer = SGD(best_model.parameters(), lr=best_params['learning_rate'], momentum=best_params['momentum'])
else:
    best_optimizer = Adam(best_model.parameters(), lr=best_params['learning_rate'])

# Train the best model
for epoch in range(best_params['num_epochs']):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        best_optimizer.zero_grad()
        outputs = best_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        best_optimizer.step()
        running_loss += loss.item()

# Save the model weights after hyperparameter tuning
torch.save(best_model.state_dict(), 'model_weights_after_tuning.pth')