In [None]:
!pip install optuna


Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.5 alembic-1.13.2 colorlog-6.8.2 optuna-3.6.1


In [20]:
# Import necessary libraries
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split, Subset
import torch.nn as nn
from torch.optim import SGD
import optuna
import numpy as np
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt

# Define a transform to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


# Loading MNIST dataset from torchvision
train_set = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)

#Load the test set (Use the same thing as train_set)
test_set = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)


In [21]:



# Set manual seed for reproducibility
torch.manual_seed(42)

# Define the neural network model with hyperparameters
class NN_classification(nn.Module):
    def __init__(self, n_kernels1, n_kernels2, kernel_size1, kernel_size2, padding1, padding2, stride1, stride2):
        super(NN_classification, self).__init__()
        self.conv1 = nn.Conv2d(1, n_kernels1, kernel_size=kernel_size1, padding=padding1, stride=stride1)
        self.conv2 = nn.Conv2d(n_kernels1, n_kernels2, kernel_size=kernel_size2, padding=padding2, stride=stride2)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(n_kernels2 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)  # Flatten the tensor
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [22]:


# Define the objective function for Optuna with cross-validation
def objective(trial):
    # Suggest hyperparameters
    n_kernels1 = trial.suggest_int('n_kernels1', 16, 64)
    n_kernels2 = trial.suggest_int('n_kernels2', 32, 128)
    kernel_size1 = trial.suggest_int('kernel_size1', 3, 7)
    kernel_size2 = trial.suggest_int('kernel_size2', 3, 7)
    padding1 = trial.suggest_int('padding1', 0, 2)
    padding2 = trial.suggest_int('padding2', 0, 2)
    stride1 = trial.suggest_int('stride1', 1, 2)
    stride2 = trial.suggest_int('stride2', 1, 2)

    # Create the model
    model = NN_classification(n_kernels1, n_kernels2, kernel_size1, kernel_size2, padding1, padding2, stride1, stride2)

    # Define the optimizer and loss function
    optimizer = SGD(model.parameters(), lr=0.01)
    loss_fn = nn.CrossEntropyLoss()

    # Cross-validation
    k_folds = 5
    kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
    val_accuracies = []

    for fold, (train_ids, val_ids) in enumerate(kfold.split(train_set)):
        train_subsampler = Subset(train_set, train_ids)
        val_subsampler = Subset(train_set, val_ids)
        train_loader = DataLoader(train_subsampler, batch_size=64, shuffle=True)
        val_loader = DataLoader(val_subsampler, batch_size=64, shuffle=False)

        best_val_loss = float('inf')
        epochs_no_improve = 0
        patience = 5
        num_epochs = 50

        for epoch in range(num_epochs):
            model.train()
            running_train_loss = 0.0
            for images, labels in train_loader:
                predictions = model(images)
                loss = loss_fn(predictions, labels)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                running_train_loss += loss.item()

            model.eval()
            running_val_loss = 0.0
            correct = 0
            total = 0
            with torch.no_grad():
                for images, labels in val_loader:
                    predictions = model(images)
                    loss = loss_fn(predictions, labels)
                    running_val_loss += loss.item()
                    _, predicted_labels = torch.max(predictions, 1)
                    total += labels.size(0)
                    correct += (predicted_labels == labels).sum().item()

            val_loss = running_val_loss / len(val_loader)
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                epochs_no_improve = 0
            else:
                epochs_no_improve += 1

            if epochs_no_improve >= patience:
                print(f'Early stopping at epoch {epoch+1} for fold {fold+1}')
                break

        val_accuracy = correct / total
        val_accuracies.append(val_accuracy)

    avg_val_accuracy = np.mean(val_accuracies)
    return avg_val_accuracy

# Create the study and optimize
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

# Print the best hyperparameters
print('Best hyperparameters: ', study.best_params)





[I 2024-07-16 22:28:37,015] A new study created in memory with name: no-name-1e7a1361-55d3-4274-afbc-338d23d63bcb
[W 2024-07-16 22:28:37,099] Trial 0 failed with parameters: {'n_kernels1': 58, 'n_kernels2': 104, 'kernel_size1': 4, 'kernel_size2': 3, 'padding1': 0, 'padding2': 0, 'stride1': 1, 'stride2': 2} because of the following error: RuntimeError("shape '[-1, 3136]' is invalid for input of size 26624").
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-22-b42f38ee572b>", line 40, in objective
    predictions = model(images)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "<ipython-input-21-7

RuntimeError: shape '[-1, 3136]' is invalid for input of size 26624

In [None]:
# Train the final model with the best hyperparameters on the entire training set
best_params = study.best_params
best_model = NN_classification(
    best_params['n_kernels1'],
    best_params['n_kernels2'],
    best_params['kernel_size1'],
    best_params['kernel_size2'],
    best_params['padding1'],
    best_params['padding2'],
    best_params['stride1'],
    best_params['stride2']
)

# Create data loader for the full training set
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)

# Define the optimizer and loss function for the final model
optimizer = SGD(best_model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()

# Train the final model
num_epochs = 10
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    best_model.train()
    running_train_loss = 0.0
    for images, labels in train_loader:
        predictions = best_model(images)
        loss = loss_fn(predictions, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        running_train_loss += loss.item()
    train_losses.append(running_train_loss / len(train_loader))


In [None]:
# Evaluate the final model on the test set
best_model.eval()
test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        predictions = best_model(images)
        loss = loss_fn(predictions, labels)
        test_loss += loss.item()
        _, predicted_labels = torch.max(predictions, 1)
        total += labels.size(0)
        correct += (predicted_labels == labels).sum().item()

test_accuracy = correct / total
print(f"Test Loss: {test_loss/len(test_loader)}")
print(f"Test Accuracy: {test_accuracy}")

# Plot the final training loss
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training Loss for the Final Model')
plt.show()