### **Task 5**

#### **Step 0:** Import required libraries

In [1]:
import os
import sys
import cv2
import json
import shutil
import random
import numpy as np
import seaborn as sns
import pennylane as qml
import matplotlib.pyplot as plt
from pennylane import numpy as pnp
import torch
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, roc_curve, auc
from contextlib import contextmanager
from tabulate import tabulate
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import shuffle

#### **Step 1:** Load and process data

In [2]:
# Count the total number of images in the folder
def count_subfolders_and_files(path_dir):
    total_files = sum(len(files) for _, _, files in os.walk(path_dir))
    print(f"Total number of images: {total_files}")

In [3]:
count_subfolders_and_files('./dataset')

Total number of images: 1801


In [4]:
def load_and_process_data(path_dir, json_file, img_size=28, max_images_per_class=None):
    json_path = os.path.join(path_dir, json_file)
    images = []
    labels = []
    
    # Load JSON data
    with open(json_path, 'r') as file:
        annotations = json.load(file)
        
    # Organize images by class
    class_images = {}
    for img_path, label in annotations.items():
        full_img_path = os.path.join(path_dir, img_path)
        if label not in class_images:
            class_images[label] = set()
        class_images[label].add(full_img_path)
    
    # Print total number of images found for each class
    for label, paths in class_images.items():
        print(f"Total number of images found for class {label}: {len(paths)}")
    print()
    
    # Limit to max_images_per_class randomly and resize images
    image_count_per_class = {}
    for label, paths in class_images.items():
        # Convert set to list for sampling
        paths_list = list(paths)
        selected_images = random.sample(paths_list, min(len(paths_list), max_images_per_class))
        count = 0
        skipped = 0
        for img_path in selected_images:
            image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if image is not None:
                # Resize the image
                image = cv2.resize(image, (img_size, img_size))
                # Add channel dimension
                image = image[:, :, np.newaxis]
                images.append(image)
                labels.append(label)
                count += 1
            else:
                print(f"Failed to load image: {img_path}")
                skipped += 1
        image_count_per_class[label] = count
        print(f"Class {label}: Copied {count}, Skipped {skipped}")
        
    # Convert list of images to numpy array and normalize to [0, 1]
    images = np.array(images, dtype='float32') / 255.0
    labels = np.array(labels)

    print()
    # Print total images and images copied per class
    tot_imgs = sum(image_count_per_class.values())
    counts = ", ".join(f"{k}:{v}" for k, v in image_count_per_class.items())
    print(f"Total images: {tot_imgs}, Images copied per class: {counts}")
    
    # Split into train, test, and validation sets
    train_imgs, test_imgs_temp, train_labels, test_labels_temp = train_test_split(images, labels, test_size=0.2, random_state=42)
    test_imgs, val_imgs, test_labels, val_labels = train_test_split(test_imgs_temp, test_labels_temp, test_size=0.5, random_state=42)

    print()
    print("Number of images after splitting:\n")
    print(f"Training images: {len(train_imgs)}, Testing images: {len(test_imgs)}, Validation images: {len(val_imgs)}")
    
    return (train_imgs, train_labels), (test_imgs, test_labels), (val_imgs, val_labels)

In [5]:
# Parameters
path_dir = './'
json_file = './dataset/data.json'
img_size = 28
max_images_per_class=200

# Load and split data
train_data, test_data, val_data = load_and_process_data(path_dir, json_file, img_size, max_images_per_class)

Total number of images found for class 0: 300
Total number of images found for class 1: 300
Total number of images found for class 2: 300
Total number of images found for class 3: 300
Total number of images found for class 4: 300
Total number of images found for class 5: 300

Class 0: Copied 200, Skipped 0
Class 1: Copied 200, Skipped 0
Class 2: Copied 200, Skipped 0
Class 3: Copied 200, Skipped 0
Class 4: Copied 200, Skipped 0
Class 5: Copied 200, Skipped 0

Total images: 1200, Images copied per class: 0:200, 1:200, 2:200, 3:200, 4:200, 5:200

Number of images after splitting:

Training images: 960, Testing images: 120, Validation images: 120


In [6]:
# print(train_data)
# print(train_data[0])
# print(train_data[1])
print(train_data[0].shape)

(960, 28, 28, 1)


#### **Step 2:** Set up the quantum device

In [7]:
# Set the number of qubits
num_qubits = 10

# Set up the quantum devise
dev = qml.device('default.qubit', wires=num_qubits)

#### **Step 3:** Define the quantum circuit that encodes features into quantum states



In [8]:
# Define the quantum circuit
@qml.qnode(dev, interface='torch')
def quantum_circuit(features, weights):
    # Embed the input features into the quantum state using amplitude encoding
    qml.templates.AmplitudeEmbedding(features, wires=range(num_qubits), pad_with=0.0, normalize=True)
    
    # Apply entangling layers with the specified weights
    qml.templates.StronglyEntanglingLayers(weights, wires=range(num_qubits))

    # Return the expected values of the Pauli Z operator for each qubit
    return [qml.expval(qml.PauliZ(i)) for i in range(num_qubits)]

In [9]:
# Function to compute the cost
def cost(weights, features, labels):
    # Calculate logits as a single tensor per sample
    logits_list = [torch.stack(quantum_circuit(features[i], weights)) for i in range(len(features))]
    
    # print(f"Logits list: {logits_list}")
    logits = torch.stack(logits_list, dim=0)
    loss = F.cross_entropy(logits, labels)
    
    return loss

#### **Step 4:** Model Training

In [10]:
# Convert data to PyTorch tensors
train_imgs = torch.from_numpy(train_data[0]).to(torch.float32)
train_labels = torch.from_numpy(train_data[1]).to(torch.long)

# Ensure the data is correctly shaped for our quantum model
train_imgs = train_imgs.view(train_imgs.size(0), -1)

In [11]:
# print(train_imgs)
# print(train_imgs.shape)

In [12]:
# Number of layers
num_layers = 3

# Define the shape of the weights: (layers, qubits, parameters per rotation)
shape = (num_layers, num_qubits, 3)

# Initialize the weights
weights = torch.tensor(pnp.random.random(shape), dtype=torch.float32, requires_grad=True)

# Define the optimizer
opt = torch.optim.Adam([weights], lr=0.3)

# Define the number of epochs
epochs = 50

for epoch in range(epochs):
    # Use the train_imgs and train_labels
    batch_features, batch_labels = train_imgs, train_labels

    # Flatten the features
    batch_features = batch_features.view(batch_features.size(0), -1)
    # print(f"Batch features shape: {batch_features.shape}")

    opt.zero_grad()
    loss = cost(weights, batch_features, batch_labels)
    # print(f"Loss: {loss}")
    loss.backward()
    opt.step()

    # Print the loss
    print(f"Epoch {epoch}: Loss = {loss.item()}")

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch 0: Loss = 2.284844317950581
Epoch 1: Loss = 2.263743296623548
Epoch 2: Loss = 2.214215088185398
Epoch 3: Loss = 2.188797532084825
Epoch 4: Loss = 2.173061530704898
Epoch 5: Loss = 2.157294200696742
Epoch 6: Loss = 2.1553725716332788
Epoch 7: Loss = 2.1551297302211365
Epoch 8: Loss = 2.1475877851036165
Epoch 9: Loss = 2.1370265210204944
Epoch 10: Loss = 2.127321971698859
Epoch 11: Loss = 2.121958777335858
Epoch 12: Loss = 2.1093852474902595
Epoch 13: Loss = 2.10261554544224
Epoch 14: Loss = 2.0886717832478174
Epoch 15: Loss = 2.073536841467328
Epoch 16: Loss = 2.0706960050835215
Epoch 17: Loss = 2.072096633014854
Epoch 18: Loss = 2.0721089622232043
Epoch 19: Loss = 2.069074466149156
Epoch 20: Loss = 2.0665265032644196
Epoch 21: Loss = 2.0649103907457116
Epoch 22: Loss = 2.0617716777497144
Epoch 23: Loss = 2.0601228287112496
Epoch 24: Loss = 2.0563171580636452
Epoch 25: Loss = 2.054847179907072
Epoch 26: Loss = 2.053631453775288
Epoch 27: Loss = 2.0512473531627213
Epoch 28: Loss = 

In [17]:
# Define the evaluation function
def evaluate_model(weights, features, labels):
    with torch.no_grad():
        logits_list = [torch.stack(quantum_circuit(features[i], weights)) for i in range(len(features))]
        logits = torch.stack(logits_list, dim=0)
        loss = F.cross_entropy(logits, labels).item()
        
        # Get predicted classes
        predicted_classes = logits.argmax(dim=1).cpu().numpy()
        true_classes = labels.cpu().numpy()
        
        accuracy = accuracy_score(true_classes, predicted_classes)
        
        return loss, accuracy, predicted_classes, true_classes, logits

In [18]:
# Convert test and validation data to PyTorch tensors
test_imgs = torch.from_numpy(test_data[0]).to(torch.float32)
test_labels = torch.from_numpy(test_data[1]).to(torch.long)
val_imgs = torch.from_numpy(val_data[0]).to(torch.float32)
val_labels = torch.from_numpy(val_data[1]).to(torch.long)

# Ensure the data is correctly shaped
test_imgs = test_imgs.view(test_imgs.size(0), -1)
val_imgs = val_imgs.view(val_imgs.size(0), -1)

In [19]:
# Evaluate the model on test and validation sets
test_loss, test_accuracy, test_pred_classes, test_true_classes, test_logits = evaluate_model(weights, test_imgs, test_labels)
val_loss, val_accuracy, val_pred_classes, val_true_classes, val_logits = evaluate_model(weights, val_imgs, val_labels)

In [None]:
# Calculate accuracy
test_accuracy = accuracy_score(test_labels.numpy(), test_preds.numpy())
val_accuracy = accuracy_score(val_labels.numpy(), val_preds.numpy())