In [1]:
# Basic Tools
import time
import numpy as np 
import sys
sys.path.append("..")
from QuantumTrain.util_SPSA import *
import random

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import torchvision.transforms as transforms

# TorchQuantum
import torchquantum as tq
# Plotting
import matplotlib.pyplot as plt
from scipy.optimize import minimize

# torch.manual_seed(42)
# np.random.seed(42)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
from qiskit.algorithms.optimizers import SPSA, QNSPSA

In [3]:

### Classical target model initialization ###

# Define the CNN model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=5)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(8, 12, kernel_size=5)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()  
        self.fc1 = nn.Linear(12*4*4, 20)
        self.fc2 = nn.Linear(20, 10)
        
    def forward(self, x):
        x = self.pool1(self.conv1(x))
        x = self.pool2(self.conv2(x))
        x = self.flatten(x)  # Use the Flatten layer
        x = self.fc1(x)
        x = self.fc2(x)
        
        return x


# Instantiate the model and loss function
model = CNNModel()

In [4]:
n_qubit, nw_list_normal = required_qubits_estimation(model)


# of NN parameters:  6690
Required qubit number:  13


In [5]:

### Training setting ########################

step       = 1e-4   # Learning rate
batch_size = 1000    # Number of samples for each training step
num_epochs = 100      # Number of training epochs
q_depth    = 16     # Depth of the quantum circuit (number of variational layers)

# Dataset setup
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Instantiate the model, move it to GPU, and set up loss function and optimizer
model_qt = QuantumTrain(
                        model,
                        n_qubit,
                        nw_list_normal,
                        q_depth,
                        device,
                        )().to(device)


# optimizer = optim.Adam(model_qt.parameters(), lr=step, weight_decay=1e-5, eps=1e-6)
# optimizer = optim.Adam([
#     {'params': model_qt.QuantumNN.parameters()},
#     {'params': model_qt.MappingNetwork.parameters()}
# ], lr=step, weight_decay=1e-5, eps=1e-6)


# scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience = 5, verbose = True, factor = 0.5)  # 'min' because we're minimizing loss


In [6]:

num_trainable_params_MM = sum(p.numel() for p in model_qt.MappingNetwork.parameters() if p.requires_grad)
num_trainable_params_QNN = sum(p.numel() for p in model_qt.QuantumNN.parameters() if p.requires_grad)
num_trainable_params = sum(p.numel() for p in model_qt.parameters() if p.requires_grad)

print("# of trainable parameter in Mapping model: ", num_trainable_params_MM)
print("# of trainable parameter in QNN model: ", num_trainable_params_QNN)
print("# of trainable parameter in full model: ",  num_trainable_params)#num_trainable_params_MM + num_trainable_params_QNN)


# of trainable parameter in Mapping model:  249
# of trainable parameter in QNN model:  1248
# of trainable parameter in full model:  1497


In [7]:
# loss_list = [] 
# acc_list = [] 


# # Objective function adjusted for DataLoader
# def objective(params):
#     total_loss = 0.0
#     param_idx = 0  # Keep track of the position in the flat param array
    

#     # Update model parameters
#     with torch.no_grad():
#         for param in model_qt.parameters():
#             param_size = param.numel()
#             # Extract the corresponding segment from the flat parameter array
#             new_param_segment = params[param_idx:param_idx + param_size]
            
#             if new_param_segment.size != param_size:
#                 # Size mismatch error, potentially due to incorrect parameter handling
#                 raise ValueError(f"Parameter size mismatch: expected {param_size}, got {new_param_segment.size}")

#             # Reshape and update the model parameter
#             param.copy_(torch.from_numpy(new_param_segment).view_as(param))
            
#             # Update the index for the next parameter
#             param_idx += param_size
            
#     # Iterate over batches
#     for i, (images, labels) in enumerate(train_loader):
#         correct = 0
#         total = 0
#         since_batch = time.time()
        
#         images, labels = images.to(device), labels.to(device)  # Move data to GPU
#         # Compute loss for this batch
#         outputs = model_qt(images)
        
#         labels_one_hot = F.one_hot(labels, num_classes=10).float()
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()     
#         loss = nn.CrossEntropyLoss()(outputs, labels_one_hot)

#         loss_list.append(loss.cpu().detach().numpy())
#         acc = 100 * correct / total
#         acc_list.append(acc)
        
#         total_loss += loss.cpu().detach().numpy()
#         # if i % 30 == 0:
#         print(f"Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}, batch time: {time.time() - since_batch:.2f}, accuracy:  {(acc):.2f}%")
    
#     # Return average loss over all batches
#     return total_loss / len(train_loader)

loss_list = [] 
acc_list = [] 

def objective(params):
    since_batch = time.time()
    total_loss = 0.0
    param_idx = 0  # Keep track of the position in the flat param array

    # Update model parameters
    with torch.no_grad():
        for param in model_qt.parameters():
            param_size = param.numel()
            # Extract the corresponding segment from the flat parameter array
            new_param_segment = params[param_idx:param_idx + param_size]
            
            if new_param_segment.size != param_size:
                # Size mismatch error, potentially due to incorrect parameter handling
                raise ValueError(f"Parameter size mismatch: expected {param_size}, got {new_param_segment.size}")

            # Reshape and update the model parameter
            param.copy_(torch.from_numpy(new_param_segment).view_as(param))
            
            # Update the index for the next parameter
            param_idx += param_size
    
    # Sample a random batch
    batch_idx = random.randint(0, len(train_loader)-1)
    for i, (images, labels) in enumerate(train_loader):
        if i == batch_idx:
            images, labels = images.to(device), labels.to(device)  # Move data to GPU
            outputs = model_qt(images)
            
            labels_one_hot = F.one_hot(labels, num_classes=10).float()
            _, predicted = torch.max(outputs.data, 1)
            total = labels.size(0)
            correct = (predicted == labels).sum().item()
            loss = nn.CrossEntropyLoss()(outputs, labels_one_hot)

            acc = 100 * correct / total
            print(f"Loss: {loss.item():.4f}, accuracy:  {(acc):.2f}%, batch time: {time.time() - since_batch:.2f} s")
            
            
            loss_list.append(loss.cpu().detach().numpy())
            acc_list.append(acc)

            total_loss = loss.cpu().detach().numpy()  # Since we are only considering one batch, this is the total loss
            break  # Exit the loop after processing the random batch
    
    return total_loss

In [8]:
# Flatten initial parameters
initial_params = []
for param in model_qt.parameters():
    initial_params.append(param.data.flatten().cpu().numpy())
initial_params = np.concatenate(initial_params)

In [9]:
len(initial_params)

1497

In [10]:
def callback(xk, y, z ,k, l):
    print("=======================")
    print("Iteration:", callback.iteration) #, ", fun: ", z) #, ", Loss: ", loss)
    callback.iteration += 1

callback.iteration = 0  # Initialize the iteration count

spsa = SPSA(maxiter=500, callback=callback, blocking=True, learning_rate=1e-3, perturbation=1e-2)
out = spsa.minimize(objective, x0 = initial_params)

# out = spsa.optimize(
#                 num_vars = len(initial_params),
#                 objective_function = objective,
#                 initial_point = initial_params
#                 )

Loss: 62.7658, accuracy:  7.40%, batch time: 2.53 s
Loss: 64.6343, accuracy:  8.70%, batch time: 0.62 s
Loss: 62.7448, accuracy:  9.60%, batch time: 1.14 s
Loss: 63.5283, accuracy:  10.60%, batch time: 0.62 s
Loss: 63.5409, accuracy:  8.80%, batch time: 3.26 s
Loss: 61.6612, accuracy:  8.20%, batch time: 4.26 s
Loss: 61.4781, accuracy:  9.00%, batch time: 4.25 s
Loss: 59.8364, accuracy:  9.20%, batch time: 3.90 s
Loss: 61.6975, accuracy:  8.80%, batch time: 0.76 s
Loss: 60.1003, accuracy:  7.80%, batch time: 0.92 s
Loss: 62.7465, accuracy:  8.60%, batch time: 0.46 s
Loss: 60.6501, accuracy:  8.40%, batch time: 3.82 s
Loss: 59.1667, accuracy:  10.10%, batch time: 4.58 s
Loss: 63.1841, accuracy:  8.40%, batch time: 4.40 s
Loss: 64.3883, accuracy:  9.20%, batch time: 2.52 s
Loss: 61.0165, accuracy:  10.10%, batch time: 2.89 s
Loss: 61.7201, accuracy:  9.10%, batch time: 3.67 s
Loss: 64.5686, accuracy:  9.20%, batch time: 3.36 s
Loss: 61.1507, accuracy:  8.60%, batch time: 4.61 s
Loss: 62.

In [None]:
# Update model parameters with optimized values
optimized_params = out.x
with torch.no_grad():
    idx = 0
    for param in model_qt.parameters():
        numel = param.numel()
        param.copy_(torch.from_numpy(optimized_params[idx:idx+numel]).view_as(param))
        idx += numel

In [None]:


# Testing train loop
model_qt.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model_qt(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the train set: {(100 * correct / total):.2f}%")

# Testing loop
model_qt.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model_qt(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the test set: {(100 * correct / total):.2f}%")

In [None]:


# #############################################
# ### Training loop ###########################

# ### (Optional) Start from pretrained model ##
# # model_qt = torch.load('L16/tq_mm_acc_99_bsf')
# # model_qt.eval()  # Set the model to evaluation mode
# #############################################

# loss_list = [] 
# acc_list = [] 
# acc_best = 0
# for epoch in range(num_epochs):
#     model_qt.train()

#     for i, (images, labels) in enumerate(train_loader):
#         correct = 0
#         total = 0
#         since_batch = time.time()
        
#         images, labels = images.to(device), labels.to(device)  # Move data to GPU

#         # Forward pass
#         outputs = model_qt(images)
#         # print("output: ", outputs)
#         labels_one_hot = F.one_hot(labels, num_classes=10).float()
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()
        
#         loss = criterion(outputs, labels_one_hot)
#         loss_list.append(loss.cpu().detach().numpy())
#         acc = 100 * correct / total
#         acc_list.append(acc)
        
        
#         if acc > acc_best:
#             # torch.save(model_qt, 'L16/3/tq_mm_acc_'+str(int(acc))+'_bsf')
#             acc_best = acc
            
#         spsa_step(model_qt, criterion, images, labels, lr=5e-5, epsilon=5e-4)
        
        
#         # if (i+1) % 100 == 0:
#         print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}, batch time: {time.time() - since_batch:.2f}, accuracy:  {(acc):.2f}%")
    
    
# #############################################

In [None]:
plt.figure(figsize=(5,3), dpi = 150)



plt.title("Quantum-Train training process")
plt.ylabel("cross-entropy loss")
plt.xlabel("training iteration")
plt.plot(loss_list ,color=plt.cm.Reds(0.6), marker = "P", markersize=2, lw = 1, alpha = 0.8, label = "L")
plt.ylim(0, 5)
plt.show()

