#CNN4

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path

import torch
from torch.autograd import Function
from torchvision import datasets, transforms
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn as nn
import torch.nn.functional as F



In [2]:
# Loading of the MNIST dataset
train_data = datasets.MNIST(
    root = 'data',  # root: The root directory where the dataset should be stored. In this case, it is set to 'data'. If the 'data' directory doesn't exist, the dataset will be downloaded to this location.
    train = True,
    transform = transforms.ToTensor(),  # transform: This parameter applies transformations to the data. In this case, transforms.ToTensor() is used to convert the images to PyTorch tensors.
    download = True,
)
test_data = datasets.MNIST(
    root = 'data',
    train = False,
    transform = transforms.ToTensor()
)

In [3]:
#Setting of the main hyper-parameters of the model
batch_size = 4 # The number of samples in each mini-batch used during training. Smaller batch sizes can lead to faster convergence but may introduce more noise into the training process.
n_train = batch_size * 125    # The total size of the training dataset. It's calculated as the product of batch_size and the number of batches (125 in this case). Adjusting the training dataset size can impact the model's ability to generalize.
n_test = batch_size * 25     # The total size of the test dataset. Similar to n_train, it's calculated as the product of batch_size and the number of test batches (25 in this case). The test dataset is used to evaluate the model's performance on unseen data.
n_channels = 4  # The number of channels in the output of the quantum convolution layer. In your model, you have set it to 4. This parameter determines the depth of the feature maps produced by the convolutional layer.
initial_lr =  0.005     # The initial learning rate for the stochastic gradient descent (SGD) optimizer.

In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Convolutional layer 1 with 1 input channels, 4 output channels, and 4x4 kernel
        self.conv = nn.Conv2d(1, 4, 4, stride=4)
        self.fc = nn.Linear(4 * 7 * 7, 10)

    def forward(self, x):
        # Propagate the input through the CNN layers
        x = self.conv(x)
        # Flatten the output from the convolutional layer
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc(x))
        return x
cnn=Net()

In [5]:
dataset  = train_data
train_size = n_train
train_set, val_set = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
for data in train_loader:
    inputs, labels = data
    print(f"{inputs.shape=}")
    print(f"{labels=}")
    outputs = cnn(inputs)
    print(f"{outputs.shape=}")
    print(f"{outputs=}")
    break

inputs.shape=torch.Size([4, 1, 28, 28])
labels=tensor([4, 6, 5, 4])
outputs.shape=torch.Size([4, 10])
outputs=tensor([[0.0098, 0.0740, 0.0316, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2329,
         0.0000],
        [0.0000, 0.1449, 0.2362, 0.0000, 0.0000, 0.0000, 0.1163, 0.0608, 0.2323,
         0.0000],
        [0.0000, 0.1602, 0.1773, 0.0000, 0.0075, 0.1945, 0.0000, 0.0000, 0.3285,
         0.0000],
        [0.0000, 0.0613, 0.1005, 0.0000, 0.0000, 0.0000, 0.0000, 0.3825, 0.0712,
         0.0000]], grad_fn=<ReluBackward0>)


In [6]:
# Train the model

import datetime
import os

dataset  = train_data

# Initialize your QCNN model
cnn = Net()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for classification
optimizer = optim.SGD(cnn.parameters(), lr=initial_lr, momentum=0.90)  # Stochastic Gradient Descent optimizer
# Create a learning rate scheduler
# Here, we use StepLR which reduces the learning rate by a factor every step_size epochs
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=1.0)
# Split your data into training and validation sets
train_size = n_train
train_set, val_set = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False)

MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "ImgClass-Quanvolv.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

RESUME_TRAINING = True

# Training loop
num_epochs = 20
loss_list = []
cnn.train()

if RESUME_TRAINING is False:
    print(f"Restore model state from {MODEL_SAVE_PATH}")
    if os.path.exists(MODEL_SAVE_PATH):
        model_dict = torch.load(MODEL_SAVE_PATH)
        initial_epoch = model_dict['epoch'] + 1
        cnn.load_state_dict(model_dict['model_state_dict'])
        optimizer.load_state_dict(model_dict['optimizer_state_dict'])
        loss_list = model_dict['loss'].copy()
    else:
        print(f"No saved model state found. Training from scratch.")
        initial_epoch = 0
        loss_list = []
else:
    initial_epoch = 0
    loss_list = []

for epoch in range(num_epochs):
    ct = datetime.datetime.now()
    # Decay Learning Rate
    optimizer.step()
    scheduler.step()
    lr = scheduler.get_last_lr()
    print(f"{epoch=}, {lr=}, {ct}")
    running_loss = []
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()  # Zero the parameter gradients to avoid accumulation
        outputs = cnn(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute the loss
        loss.backward()  # Backpropagation
        running_loss.append(loss.item())
        optimizer.step()  # Update the model parameters
    loss_list.append(sum(running_loss) / len(running_loss))
    print('Training [{:.0f}%]\tLoss: {:.4f}'.format(100. * (epoch + 1) / num_epochs, loss_list[-1]))
    torch.save({
        'epoch': epoch,
        'model_state_dict': cnn.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss_list,
    }, MODEL_SAVE_PATH)
    print(f"Saving model state to {MODEL_SAVE_PATH}")

print('Finished Training')

epoch=0, lr=[0.005], 2024-10-15 20:49:20.067674
Training [5%]	Loss: 2.2053
Saving model state to models\ImgClass-Quanvolv.pth
epoch=1, lr=[0.005], 2024-10-15 20:49:21.015714
Training [10%]	Loss: 1.7037
Saving model state to models\ImgClass-Quanvolv.pth
epoch=2, lr=[0.005], 2024-10-15 20:49:21.784090
Training [15%]	Loss: 1.4835
Saving model state to models\ImgClass-Quanvolv.pth
epoch=3, lr=[0.005], 2024-10-15 20:49:22.518950
Training [20%]	Loss: 1.4110
Saving model state to models\ImgClass-Quanvolv.pth
epoch=4, lr=[0.005], 2024-10-15 20:49:23.410651
Training [25%]	Loss: 1.3557
Saving model state to models\ImgClass-Quanvolv.pth
epoch=5, lr=[0.005], 2024-10-15 20:49:24.540201
Training [30%]	Loss: 1.3359
Saving model state to models\ImgClass-Quanvolv.pth
epoch=6, lr=[0.005], 2024-10-15 20:49:25.341898
Training [35%]	Loss: 1.3124
Saving model state to models\ImgClass-Quanvolv.pth
epoch=7, lr=[0.005], 2024-10-15 20:49:26.012129
Training [40%]	Loss: 1.2842
Saving model state to models\ImgClas

In [7]:
cnn.state_dict()

OrderedDict([('conv.weight',
              tensor([[[[-1.5493e-01,  6.1699e-02, -3.4229e-01, -5.7181e-01],
                        [ 1.3176e-01, -4.4332e-01, -8.5191e-01, -7.0117e-01],
                        [-7.6336e-02, -5.5908e-01, -6.2234e-01, -9.7374e-01],
                        [-6.8768e-01, -5.2798e-01, -8.6005e-01, -1.0156e+00]]],
              
              
                      [[[-2.1123e-01, -1.4189e-01,  4.4162e-02,  3.0915e-01],
                        [ 1.6275e-01,  2.9751e-01,  3.0260e-01,  6.7250e-01],
                        [ 2.3246e-01,  4.3027e-01,  5.7225e-01,  4.0087e-01],
                        [-1.6238e-01,  4.4946e-02,  2.6196e-01,  1.2477e-01]]],
              
              
                      [[[ 7.9610e-01,  6.7278e-01,  5.1429e-01,  5.8536e-01],
                        [ 1.5719e-01,  1.0988e-01, -7.4023e-02, -1.1513e-03],
                        [-5.4425e-01, -4.7641e-01, -3.8050e-01, -5.2670e-01],
                        [-1.1971e+00, -6.9468e-01

In [8]:
#accuracy

# Use a small subset of the full validation dataset
from torch.utils.data import SubsetRandomSampler

K = n_test # enter your length here
subsample_train_indices = torch.randperm(len(val_set))[:K]
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, sampler=SubsetRandomSampler(subsample_train_indices))

correct = 0
total = 0
# Set the model to evaluation mode
cnn.eval()
with torch.inference_mode():
    for data in val_loader:
        images, labels = data
        outputs = cnn(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy on the validation set: {100 * correct / total:.2f}%')

Accuracy on the validation set: 41.00%
