In [1]:
import time
import os
import copy

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, transforms
import torchvision.transforms as transforms

## Distributed training
import torch.distributed as dist
from torch.utils.data.distributed import DistributedSampler
from torch.nn.parallel import DistributedDataParallel as DDP

# # Pennylane
# import pennylane as qml
# from pennylane import numpy as np
import torchquantum as tq
import numpy as np 



torch.manual_seed(42)
np.random.seed(42)

# Plotting
import matplotlib.pyplot as plt


import itertools

# OpenMP: number of parallel threads.
# os.environ["OMP_NUM_THREADS"] = "1"

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


### Classical target model initialization ###

# Hyperparameters
batch_size = 200
learning_rate = 0.001
num_epochs = 1

# Data loading and preprocessing
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Define the CNN model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(8, 12, kernel_size=5)
        self.fc1 = nn.Linear(12*4*4, 20)
        self.fc2 = nn.Linear(20, 10)
        
    def forward(self, x):
        x = self.pool(self.conv1(x))
        x = self.pool(self.conv2(x))
        x = x.view(x.size(0), -1) # [N, 32 * 8 * 8]
        x = self.fc1(x)
        x = self.fc2(x)
        
        return x

# Instantiate the model and loss function
model = CNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")


# Testing train loop
model.eval()
correct = 0
total = 0

loss_train_list = []
with torch.no_grad():
    for images, labels in train_loader:
        outputs = model(images)
        loss_train = criterion(outputs, labels).cpu().detach().numpy()
        loss_train_list.append(loss_train)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the train set: {(100 * correct / total):.2f}%")

# Testing loop
model.eval()
correct = 0
total = 0
loss_test_list = [] 
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        loss_test = criterion(outputs, labels).cpu().detach().numpy()
        loss_test_list.append(loss_test)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the test set: {(100 * correct / total):.2f}%")

print(f"Loss on the train set: {np.mean(loss_train_list):.2f}")
print(f"Loss on the test set: {np.mean(loss_test_list):.2f}")
print("Generalization error:", np.mean(loss_test_list) - np.mean(loss_train_list))

Epoch [1/1], Step [100/300], Loss: 0.7110
Epoch [1/1], Step [200/300], Loss: 0.5653
Epoch [1/1], Step [300/300], Loss: 0.6209
Accuracy on the train set: 80.30%
Accuracy on the test set: 79.42%
Loss on the train set: 0.54
Loss on the test set: 0.57
Generalization error: 0.023578882


In [2]:
### required qubits estimation ##############
# NN weights

numpy_weights = {}
nw_list = [] 
nw_list_normal = []
for name, param in model.state_dict().items():
    numpy_weights[name] = param.cpu().numpy()
for i in numpy_weights:
    nw_list.append(list(numpy_weights[i].flatten()))
for i in nw_list:
    for j in i:
        nw_list_normal.append(j)
print("# of NN parameters: ", len(nw_list_normal))
n_qubits = int(np.ceil(np.log2(len(nw_list_normal))))
print("Required qubit number: ", n_qubits)

n_qubit = n_qubits

#############################################

# of NN parameters:  6690
Required qubit number:  13


In [3]:


### Some tool function definition ###########
def probs_to_weights(probs_):

    new_state_dict = {}
    data_iterator = probs_.view(-1)

    for name, param in CNNModel().state_dict().items():
        shape = param.shape
        num_elements = param.numel()
        chunk = data_iterator[:num_elements].reshape(shape)
        new_state_dict[name] = chunk
        data_iterator = data_iterator[num_elements:]
        
    return new_state_dict

def generate_qubit_states_torch(n_qubit):
    # Create a tensor of shape (2**n_qubit, n_qubit) with all possible combinations of 0 and 1
    all_states = torch.cartesian_prod(*[torch.tensor([-1, 1]) for _ in range(n_qubit)])
    return all_states

#############################################

### Main Learning-wise Hybridization model ##

class LewHybridNN(nn.Module):
    class QLayer(nn.Module):
        def __init__(self, n_blocks):
            super().__init__()
            self.n_wires = int(np.ceil(np.log2(len(nw_list_normal)))),
            self.n_wires = self.n_wires[0]
            self.n_blocks = n_blocks
            self.u3_layers = tq.QuantumModuleList()
            self.cu3_layers = tq.QuantumModuleList()
            # self.measure = tq.MeasureAll(tq.PauliZ)
            for _ in range(self.n_blocks):
                self.u3_layers.append(
                    tq.Op1QAllLayer(
                        op=tq.U3,
                        n_wires=self.n_wires,
                        has_params=True,
                        trainable=True,
                    )
                )
                self.cu3_layers.append(
                    tq.Op2QAllLayer(
                        op=tq.CU3,
                        n_wires=self.n_wires,
                        has_params=True,
                        trainable=True,
                        circular=True,
                    )
                )
                
        def forward(self):
            qdev = tq.QuantumDevice(
                n_wires=self.n_wires, bsz=1, device=next(self.parameters()).device
            )
            easy_scale_coeff = 2**(n_qubit-1)
            gamma = 0.1
            beta  = 0.8
            alpha = 0.3
            for k in range(self.n_blocks):
                self.u3_layers[k](qdev)
                self.cu3_layers[k](qdev)
                
            state_mag = qdev.get_states_1d().abs()[0] 
            state_mag = state_mag[:len(nw_list_normal)]
            x = torch.abs(state_mag) ** 2
            # x = torch.log(x)
            x = x.reshape(len(nw_list_normal),1)
            x = (beta*torch.tanh(gamma*easy_scale_coeff*x))**(alpha) 
            x = x - torch.mean(x)
            x.to(device)
            return x

        
        
    class MappingModel(nn.Module):
        def __init__(self, input_size, hidden_sizes, output_size):
            super().__init__()
            # Initialize layers: an input layer, multiple hidden layers, and an output layer
            self.input_layer = nn.Linear(input_size, hidden_sizes[0])
            self.hidden_layers = nn.ModuleList([nn.Linear(hidden_sizes[i], hidden_sizes[i+1]) for i in range(len(hidden_sizes)-1)])
            self.output_layer = nn.Linear(hidden_sizes[-1], output_size)
            
        def forward(self, X):
            # Ensure the input tensor is the same type as the weights
            X = X.type_as(self.input_layer.weight)

            # Input layer with ReLU activation
            X = self.input_layer(X)

            # Hidden layers with ReLU activation
            for hidden in self.hidden_layers:
                X = hidden(X)

            # Output layer with linear activation
            output = self.output_layer(X)
            # output = F.tanh(output)  # It's often better to use ReLU or similar; tanh is used here as it was in the original model.
            return output

    def __init__(self):
        """
        Definition of the *dressed* layout.
        """
        super().__init__()
        self.MappingNetwork = self.MappingModel(n_qubit+1, [4, 20, 4], 1).to(device)  
        # self.MappingNetwork = self.ConvMappingModel()
        self.QuantumNN = self.QLayer(q_depth).to(device)   #arch={"n_blocks": q_depth})
    
    def forward(self, x):
        """
        Defining how tensors are supposed to move through the *dressed* quantum
        net.
        """
        device = x.device

        probs_ = self.QuantumNN()
        probs_ = probs_[:len(nw_list_normal)]
        
        # Generate qubit states using PyTorch
        qubit_states_torch = generate_qubit_states_torch(n_qubit)[:len(nw_list_normal)]
        qubit_states_torch = qubit_states_torch.to(device)

        # Combine qubit states with probability values using PyTorch
        combined_data_torch = torch.cat((qubit_states_torch, probs_), dim=1)
        combined_data_torch = combined_data_torch.reshape(len(nw_list_normal), 1, n_qubit+1)
        
        prob_val_post_processed = self.MappingNetwork(combined_data_torch)
        prob_val_post_processed = prob_val_post_processed - prob_val_post_processed.mean()
        
        state_dict = probs_to_weights(prob_val_post_processed)

        ######## 
            
        dtype = torch.float32  # Ensure all tensors are of this type
        
        # Convolution layer 1 parameters
        conv1_weight = state_dict['conv1.weight'].to(device).type(dtype)
        conv1_bias = state_dict['conv1.bias'].to(device).type(dtype)

        # Convolution layer 2 parameters
        conv2_weight = state_dict['conv2.weight'].to(device).type(dtype)
        conv2_bias = state_dict['conv2.bias'].to(device).type(dtype)

        # Fully connected layer 1 parameters
        fc1_weight = state_dict['fc1.weight'].to(device).type(dtype)
        fc1_bias = state_dict['fc1.bias'].to(device).type(dtype)

        # Fully connected layer 2 parameters
        fc2_weight = state_dict['fc2.weight'].to(device).type(dtype)
        fc2_bias = state_dict['fc2.bias'].to(device).type(dtype)
        
        
        # Convolution 1
        x = F.conv2d(x, conv1_weight, conv1_bias, stride=1)
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        # Convolution 2
        x = F.conv2d(x, conv2_weight, conv2_bias, stride=1)
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        # Flatten
        x = x.view(x.size(0), -1)

        # Fully connected 1
        x = F.linear(x, fc1_weight, fc1_bias)

        # Fully connected 2
        x = F.linear(x, fc2_weight, fc2_bias)

    
        return x 

In [4]:

### Training setting ########################

step = 1e-4                 # Learning rate
batch_size = 128       # Number of samples for each training step
num_epochs = 50             # Number of training epochs
q_depth = 16             # Depth of the quantum circuit (number of variational layers)
gamma_lr_scheduler = 0.1    # Learning rate reduction applied every 10 epochs.
q_delta = 0.1              # Initial spread of random quantum weights

train_dataset = datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)



# Instantiate the model, move it to GPU, and set up loss function and optimizer
model = LewHybridNN().to(device)
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=step, weight_decay=1e-5, eps=1e-6)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience = 5, verbose = True, factor = 0.5)  # 'min' because we're minimizing loss

num_trainable_params_MM = sum(p.numel() for p in LewHybridNN.MappingModel(n_qubit+1,  [4, 20, 4], 1).parameters() if p.requires_grad)
# num_trainable_params_MM = sum(p.numel() for p in LewHybridNN.ConvMappingModel().parameters() if p.requires_grad)
num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("# of trainable parameter in Mapping model: ", num_trainable_params_MM)
print("# of trainable parameter in QNN model: ", num_trainable_params - num_trainable_params_MM)
print("# of trainable parameter in full model: ", num_trainable_params)


# of trainable parameter in Mapping model:  249
# of trainable parameter in QNN model:  1248
# of trainable parameter in full model:  1497


In [5]:


#############################################
### Training loop ###########################

### (Optional) Start from pretrained model ##
# model = torch.load('result_FF_mm_b1000_40_200_40/tq_mm_acc_70_bsf')
# model.eval()  # Set the model to evaluation mode
#############################################

loss_list = [] 
acc_list = [] 
acc_best = 0
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for i, (images, labels) in enumerate(train_loader):
        correct = 0
        total = 0
        since_batch = time.time()
        
        images, labels = images.to(device), labels.to(device)  # Move data to GPU
        optimizer.zero_grad()
        # Forward pass
        outputs = model(images)
        # print("output: ", outputs)
        labels_one_hot = F.one_hot(labels, num_classes=10).float()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        # Compute loss
        loss = criterion(outputs, labels_one_hot)
        # log_loss = torch.log(loss + 1e-6)
        
        loss_list.append(loss.cpu().detach().numpy())
        acc = 100 * correct / total
        acc_list.append(acc)
        train_loss += loss.cpu().detach().numpy()
        
        np.array(loss_list).dump("L16/loss_list.dat")
        np.array(acc_list).dump("L16/acc_list.dat")
        if acc > acc_best:
            torch.save(model, 'L16/tq_mm_acc_'+str(int(acc))+'_bsf')
            acc_best = acc
        # Backward pass and optimization
        loss.backward()
        
        optimizer.step()
        # if (i+1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}, batch time: {time.time() - since_batch:.2f}, accuracy:  {(acc):.2f}%")
    
    train_loss /= len(train_loader)
    scheduler.step(train_loss) 
    
#############################################

Epoch [1/50], Step [1/469], Loss: 41.3161, batch time: 3.81, accuracy:  10.94%
Epoch [1/50], Step [2/469], Loss: 38.6875, batch time: 0.82, accuracy:  11.72%
Epoch [1/50], Step [3/469], Loss: 35.1488, batch time: 0.79, accuracy:  12.50%
Epoch [1/50], Step [4/469], Loss: 32.8348, batch time: 0.81, accuracy:  18.75%
Epoch [1/50], Step [5/469], Loss: 35.0093, batch time: 0.74, accuracy:  9.38%
Epoch [1/50], Step [6/469], Loss: 30.1522, batch time: 0.79, accuracy:  15.62%
Epoch [1/50], Step [7/469], Loss: 25.4352, batch time: 0.80, accuracy:  16.41%
Epoch [1/50], Step [8/469], Loss: 29.7511, batch time: 0.82, accuracy:  14.84%
Epoch [1/50], Step [9/469], Loss: 24.8017, batch time: 0.79, accuracy:  12.50%
Epoch [1/50], Step [10/469], Loss: 29.6046, batch time: 0.78, accuracy:  8.59%
Epoch [1/50], Step [11/469], Loss: 29.7837, batch time: 0.79, accuracy:  11.72%
Epoch [1/50], Step [12/469], Loss: 24.4330, batch time: 0.91, accuracy:  11.72%
Epoch [1/50], Step [13/469], Loss: 24.4856, batch t

In [6]:
# Testing train loop
model.eval()
correct = 0
total = 0
loss_train_list = []
with torch.no_grad():
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to GPU
        outputs = model(images)
        loss_train = criterion(outputs, labels).cpu().detach().numpy()
        loss_train_list.append(loss_train)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the train set: {(100 * correct / total):.2f}%")
print(f"Loss on the train set: {np.mean(loss_train_list):.2f}")

# Testing loop
model.eval()
correct = 0
total = 0
loss_test_list = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to GPU
        outputs = model(images)
        loss_test = criterion(outputs, labels).cpu().detach().numpy()
        loss_test_list.append(loss_test)

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the test set: {(100 * correct / total):.2f}%")
print(f"Loss on the test set: {np.mean(loss_test_list):.2f}")

print("Generalization error:", np.mean(loss_test_list) - np.mean(loss_train_list))

Accuracy on the train set: 79.46%
Loss on the train set: 0.57
Accuracy on the test set: 78.71%
Loss on the test set: 0.59
Generalization error: 0.020973265
