#CNN4

In [5]:
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path

import torch
from torch.autograd import Function
from torchvision import datasets, transforms
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn as nn
import torch.nn.functional as F
import kagglehub



  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# Loading of the MNIST dataset
path = kagglehub.dataset_download("hojjatk/mnist-dataset")
train_data = datasets.MNIST(
    root = path,  # root: The root directory where the dataset should be stored. In this case, it is set to 'data'. If the 'data' directory doesn't exist, the dataset will be downloaded to this location.
    train = True,
    transform = transforms.ToTensor(),  # transform: This parameter applies transformations to the data. In this case, transforms.ToTensor() is used to convert the images to PyTorch tensors.
    download = True,
)
test_data = datasets.MNIST(
    root = path,
    train = False,
    transform = transforms.ToTensor()
)

Downloading from https://www.kaggle.com/api/v1/datasets/download/hojjatk/mnist-dataset?dataset_version_number=1...


100%|██████████| 22.0M/22.0M [00:00<00:00, 25.4MB/s]

Extracting files...



100.0%
100.0%
100.0%
100.0%


In [7]:
#Setting of the main hyper-parameters of the model
batch_size = 4 # The number of samples in each mini-batch used during training. Smaller batch sizes can lead to faster convergence but may introduce more noise into the training process.
n_train = batch_size * 125    # The total size of the training dataset. It's calculated as the product of batch_size and the number of batches (125 in this case). Adjusting the training dataset size can impact the model's ability to generalize.
n_test = batch_size * 25     # The total size of the test dataset. Similar to n_train, it's calculated as the product of batch_size and the number of test batches (25 in this case). The test dataset is used to evaluate the model's performance on unseen data.
n_channels = 4  # The number of channels in the output of the quantum convolution layer. In your model, you have set it to 4. This parameter determines the depth of the feature maps produced by the convolutional layer.
initial_lr =  0.005     # The initial learning rate for the stochastic gradient descent (SGD) optimizer.

In [8]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Convolutional layer 1 with 1 input channels, 4 output channels, and 4x4 kernel
        self.conv = nn.Conv2d(1, 4, 4, stride=4)
        self.fc = nn.Linear(4 * 7 * 7, 10)

    def forward(self, x):
        # Propagate the input through the CNN layers
        x = self.conv(x)
        # Flatten the output from the convolutional layer
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc(x))
        return x
cnn=Net()

In [9]:
dataset  = train_data
train_size = n_train
train_set, val_set = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
for data in train_loader:
    inputs, labels = data
    print(f"{inputs.shape=}")
    print(f"{labels=}")
    outputs = cnn(inputs)
    print(f"{outputs.shape=}")
    print(f"{outputs=}")
    break

inputs.shape=torch.Size([4, 1, 28, 28])
labels=tensor([6, 2, 6, 5])
outputs.shape=torch.Size([4, 10])
outputs=tensor([[0.0136, 0.0000, 0.0000, 0.0000, 0.1541, 0.0806, 0.0000, 0.0186, 0.3018,
         0.0000],
        [0.1908, 0.0000, 0.0000, 0.0853, 0.0992, 0.0000, 0.0000, 0.1334, 0.1714,
         0.0741],
        [0.0832, 0.0000, 0.0042, 0.0000, 0.1244, 0.0000, 0.0460, 0.0000, 0.0222,
         0.0944],
        [0.0000, 0.0000, 0.1285, 0.0000, 0.0608, 0.0864, 0.0000, 0.0000, 0.2706,
         0.0716]], grad_fn=<ReluBackward0>)


In [13]:
# Train the model

import datetime
import os

dataset  = train_data

# Initialize your QCNN model
cnn = Net()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for classification
optimizer = optim.SGD(cnn.parameters(), lr=initial_lr, momentum=0.90)  # Stochastic Gradient Descent optimizer
# Create a learning rate scheduler
# Here, we use StepLR which reduces the learning rate by a factor every step_size epochs
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=1.0)
# Split your data into training and validation sets
train_size = n_train
train_set, val_set = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False)

MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "ImgClass-Quanvolv.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

RESUME_TRAINING = True

# Training loop
num_epochs = 30
loss_list = []
cnn.train()

if RESUME_TRAINING is False:
    print(f"Restore model state from {MODEL_SAVE_PATH}")
    if os.path.exists(MODEL_SAVE_PATH):
        model_dict = torch.load(MODEL_SAVE_PATH)
        initial_epoch = model_dict['epoch'] + 1
        cnn.load_state_dict(model_dict['model_state_dict'])
        optimizer.load_state_dict(model_dict['optimizer_state_dict'])
        loss_list = model_dict['loss'].copy()
    else:
        print(f"No saved model state found. Training from scratch.")
        initial_epoch = 0
        loss_list = []
else:
    initial_epoch = 0
    loss_list = []

for epoch in range(num_epochs):
    ct = datetime.datetime.now()
    # Decay Learning Rate
    optimizer.step()
    scheduler.step()
    lr = scheduler.get_last_lr()
    print(f"{epoch=}, {lr=}, {ct}")
    running_loss = []
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()  # Zero the parameter gradients to avoid accumulation
        outputs = cnn(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute the loss
        loss.backward()  # Backpropagation
        running_loss.append(loss.item())
        optimizer.step()  # Update the model parameters
    loss_list.append(sum(running_loss) / len(running_loss))
    print('Training [{:.0f}%]\tLoss: {:.4f}'.format(100. * (epoch + 1) / num_epochs, loss_list[-1]))
    torch.save({
        'epoch': epoch,
        'model_state_dict': cnn.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss_list,
    }, MODEL_SAVE_PATH)
    print(f"Saving model state to {MODEL_SAVE_PATH}")

print('Finished Training')

epoch=0, lr=[0.005], 2025-07-12 17:48:21.560920
Training [3%]	Loss: 2.0916
Saving model state to models/ImgClass-Quanvolv.pth
epoch=1, lr=[0.005], 2025-07-12 17:48:21.635545
Training [7%]	Loss: 1.2708
Saving model state to models/ImgClass-Quanvolv.pth
epoch=2, lr=[0.005], 2025-07-12 17:48:21.696070
Training [10%]	Loss: 1.0235
Saving model state to models/ImgClass-Quanvolv.pth
epoch=3, lr=[0.005], 2025-07-12 17:48:21.755321
Training [13%]	Loss: 0.9353
Saving model state to models/ImgClass-Quanvolv.pth
epoch=4, lr=[0.005], 2025-07-12 17:48:21.817065
Training [17%]	Loss: 0.8885
Saving model state to models/ImgClass-Quanvolv.pth
epoch=5, lr=[0.005], 2025-07-12 17:48:21.873780
Training [20%]	Loss: 0.8644
Saving model state to models/ImgClass-Quanvolv.pth
epoch=6, lr=[0.005], 2025-07-12 17:48:21.929229
Training [23%]	Loss: 0.8418
Saving model state to models/ImgClass-Quanvolv.pth
epoch=7, lr=[0.005], 2025-07-12 17:48:21.989628
Training [27%]	Loss: 0.8167
Saving model state to models/ImgClass

In [159]:
cnn.state_dict()

OrderedDict([('conv.weight',
              tensor([[[[-9.2388e-01, -5.8929e-01, -6.7031e-01, -5.2077e-01],
                        [-1.0605e+00, -1.0203e+00, -8.6686e-01, -3.9097e-01],
                        [-1.0201e+00, -6.6626e-01, -2.3081e-01, -3.9852e-01],
                        [-8.1234e-01, -2.5749e-01,  8.2552e-04,  2.5906e-02]]],
              
              
                      [[[-1.7290e-01, -1.0526e-01,  2.5084e-02,  5.1990e-01],
                        [ 4.0717e-02, -6.1931e-02,  2.3308e-01,  1.1402e-01],
                        [-1.9381e-01, -2.5186e-01, -1.3225e-01, -7.5126e-01],
                        [-5.5205e-01, -6.6428e-01, -2.5852e-01, -9.1830e-01]]],
              
              
                      [[[ 1.7420e-01,  4.1984e-01,  9.8764e-01,  5.3831e-01],
                        [-1.9157e-02, -6.4329e-02,  3.8352e-01,  5.9392e-01],
                        [ 6.6614e-02,  1.3925e-01,  8.1434e-01,  1.3035e+00],
                        [-4.7035e-02,  5.9577e-01

In [14]:
#accuracy

# Use a small subset of the full validation dataset
from torch.utils.data import SubsetRandomSampler

K = n_test # enter your length here
subsample_train_indices = torch.randperm(len(val_set))[:K]
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, sampler=SubsetRandomSampler(subsample_train_indices))

correct = 0
total = 0
# Set the model to evaluation mode
cnn.eval()
with torch.inference_mode():
    for data in val_loader:
        images, labels = data
        outputs = cnn(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy on the validation set: {100 * correct / total:.2f}%')

Accuracy on the validation set: 65.00%
