In [1]:
# Basic Tools
import time
import numpy as np 
from util import *

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import torchvision.transforms as transforms

# TorchQuantum
import torchquantum as tq
# Plotting
import matplotlib.pyplot as plt

# torch.manual_seed(42)
# np.random.seed(42)

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [2]:

### Classical target model initialization ###

# Define the CNN model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        # Writing every operation as layer, so that the extraction function could read
        self.conv1 = nn.Conv2d(1, 8, kernel_size=5)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(8, 12, kernel_size=5)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()  
        self.fc1 = nn.Linear(12*4*4, 20)
        self.fc2 = nn.Linear(20, 10)
        
    def forward(self, x):
        x = self.pool1(self.conv1(x))
        x = self.pool2(self.conv2(x))
        x = self.flatten(x)  # Use the Flatten layer
        x = self.fc1(x)
        x = self.fc2(x)
        
        return x


# Instantiate the model and loss function
model = CNNModel()

In [3]:
n_qubit, nw_list_normal = required_qubits_estimation(model)
network_config          = network_config_extract(model)

# of NN parameters:  6690
Required qubit number:  13


In [4]:

### Training setting ########################

step       = 1e-4   # Learning rate
batch_size = 1000    # Number of samples for each training step
num_epochs = 100      # Number of training epochs
q_depth    = 16     # Depth of the quantum circuit (number of variational layers)

# Dataset setup
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Instantiate the model, move it to GPU, and set up loss function and optimizer
model_qt = QuantumTrain(
                        model,
                        n_qubit,
                        nw_list_normal,
                        q_depth,
                        device,
                        network_config
                        )().to(device)

criterion = nn.CrossEntropyLoss()

# optimizer = optim.Adam(model_qt.parameters(), lr=step, weight_decay=1e-5, eps=1e-6)
optimizer = optim.Adam([
    {'params': model_qt.QuantumNN.parameters()},
    {'params': model_qt.MappingNetwork.parameters()}
], lr=step, weight_decay=1e-5, eps=1e-6)


scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience = 5, verbose = True, factor = 0.5)  # 'min' because we're minimizing loss


In [5]:

num_trainable_params_MM = sum(p.numel() for p in model_qt.MappingNetwork.parameters() if p.requires_grad)
num_trainable_params_QNN = sum(p.numel() for p in model_qt.QuantumNN.parameters() if p.requires_grad)
num_trainable_params = sum(p.numel() for p in model_qt.parameters() if p.requires_grad)

print("# of trainable parameter in Mapping model: ", num_trainable_params_MM)
print("# of trainable parameter in QNN model: ", num_trainable_params_QNN)
print("# of trainable parameter in full model: ", num_trainable_params)


# of trainable parameter in Mapping model:  249
# of trainable parameter in QNN model:  1248
# of trainable parameter in full model:  1497


In [6]:


#############################################
### Training loop ###########################

### (Optional) Start from pretrained model ##
# model_qt = torch.load('L16/tq_mm_acc_99_bsf')
# model_qt.eval()  # Set the model to evaluation mode
#############################################

loss_list = [] 
acc_list = [] 
acc_best = 0
for epoch in range(num_epochs):
    model_qt.train()
    train_loss = 0
    for i, (images, labels) in enumerate(train_loader):
        correct = 0
        total = 0
        since_batch = time.time()
        
        images, labels = images.to(device), labels.to(device)  # Move data to GPU
        optimizer.zero_grad()
        # Forward pass
        outputs = model_qt(images)
        # print("output: ", outputs)
        labels_one_hot = F.one_hot(labels, num_classes=10).float()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        # Compute loss
        loss = criterion(outputs, labels_one_hot)
        # log_loss = torch.log(loss + 1e-6)
        
        loss_list.append(loss.cpu().detach().numpy())
        acc = 100 * correct / total
        acc_list.append(acc)
        train_loss += loss.cpu().detach().numpy()
        
        # np.array(loss_list).dump("L16/3/loss_list.dat")
        # np.array(acc_list).dump("L16/3/acc_list.dat")
        if acc > acc_best:
            # torch.save(model_qt, 'L16/3/tq_mm_acc_'+str(int(acc))+'_bsf')
            acc_best = acc
        # Backward pass and optimization
        loss.backward()
        
        optimizer.step()
        # if (i+1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}, batch time: {time.time() - since_batch:.2f}, accuracy:  {(acc):.2f}%")
    
    train_loss /= len(train_loader)
    scheduler.step(train_loss)
    
#############################################

Epoch [1/100], Step [1/60], Loss: 2.3778, batch time: 4.01, accuracy:  8.10%
Epoch [1/100], Step [2/60], Loss: 2.3692, batch time: 0.84, accuracy:  8.80%
Epoch [1/100], Step [3/60], Loss: 2.3693, batch time: 0.95, accuracy:  9.10%
Epoch [1/100], Step [4/60], Loss: 2.3607, batch time: 0.94, accuracy:  10.20%
Epoch [1/100], Step [5/60], Loss: 2.3641, batch time: 0.94, accuracy:  9.20%
Epoch [1/100], Step [6/60], Loss: 2.3458, batch time: 0.96, accuracy:  10.90%
Epoch [1/100], Step [7/60], Loss: 2.3641, batch time: 0.95, accuracy:  8.90%
Epoch [1/100], Step [8/60], Loss: 2.3424, batch time: 0.94, accuracy:  10.00%
Epoch [1/100], Step [9/60], Loss: 2.3501, batch time: 0.75, accuracy:  10.10%
Epoch [1/100], Step [10/60], Loss: 2.3499, batch time: 0.74, accuracy:  8.10%
Epoch [1/100], Step [11/60], Loss: 2.3512, batch time: 0.76, accuracy:  8.90%
Epoch [1/100], Step [12/60], Loss: 2.3432, batch time: 0.77, accuracy:  10.30%
Epoch [1/100], Step [13/60], Loss: 2.3506, batch time: 0.76, accurac

In [7]:
# Print gradients of all parameters
for name, param in model_qt.named_parameters():
    print(f"Gradient of {name}: {param.grad}")

Gradient of MappingNetwork.input_layer.weight: tensor([[-7.9607e+00, -7.3004e+00, -7.4006e+00, -6.6216e+00, -1.1015e+01,
         -3.7132e-01, -3.8950e+00, -4.8194e+00,  3.5117e-01, -3.9566e+00,
         -2.9183e+00, -2.7443e-01,  3.9356e-02, -3.5626e-01],
        [ 5.5427e+00,  5.0829e+00,  5.1527e+00,  4.6103e+00,  7.6690e+00,
          2.5853e-01,  2.7119e+00,  3.3555e+00, -2.4450e-01,  2.7548e+00,
          2.0319e+00,  1.9108e-01, -2.7402e-02,  2.4805e-01],
        [-3.0348e+00, -2.7830e+00, -2.8212e+00, -2.5243e+00, -4.1990e+00,
         -1.4155e-01, -1.4848e+00, -1.8372e+00,  1.3387e-01, -1.5083e+00,
         -1.1125e+00, -1.0462e-01,  1.5003e-02, -1.3581e-01],
        [ 1.5247e-01,  1.3982e-01,  1.4174e-01,  1.2682e-01,  2.1096e-01,
          7.1115e-03,  7.4599e-02,  9.2304e-02, -6.7258e-03,  7.5780e-02,
          5.5894e-02,  5.2562e-03, -7.5391e-04,  6.8233e-03]], device='cuda:1')
Gradient of MappingNetwork.input_layer.bias: tensor([ 8.9407e-08, -5.9605e-08, -1.4901e-08, -2.