In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import torch.nn.utils.prune as prune
from torch.utils.data import DataLoader
from utils import MNIST_partial, accuracy, plot_training_metrics


In [2]:

# Hyperparameters
learning_rate = 1e-3
num_epochs = 50

# Data loading and preprocessing
# transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
# train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
# test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# dataset from csv file, to use for the challenge
train_dataset = MNIST_partial(split = 'train')
val_dataset = MNIST_partial(split='val')

# definition of the dataloader, to process the data in the model
# here, we need a batch size of 1 to use the boson sampler
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size, shuffle = False)

# Define the CNN model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(8, 12, kernel_size=5)
        self.fc1 = nn.Linear(12 * 4 * 4, 20)
        self.fc2 = nn.Linear(20, 10)

    def forward(self, x):
        x = self.pool(self.conv1(x))
        x = self.pool(self.conv2(x))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc1(x)
        x = self.fc2(x)
        return x


In [3]:

# Instantiate the model, loss function, and optimizer
model = CNNModel()
criterion = nn.CrossEntropyLoss()


print(
    sum(p.numel() for p in model.parameters() if p.requires_grad)
)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)


6690


CNNModel(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=10, bias=True)
)

In [4]:

# Training loop
for epoch in range(num_epochs):
    model.train()
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)  # Move data to GPU

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i+1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")


Epoch [1/50], Step [10/47], Loss: 2.2136
Epoch [1/50], Step [20/47], Loss: 2.0858
Epoch [1/50], Step [30/47], Loss: 1.6598
Epoch [1/50], Step [40/47], Loss: 1.2093
Epoch [2/50], Step [10/47], Loss: 0.6784
Epoch [2/50], Step [20/47], Loss: 0.6495
Epoch [2/50], Step [30/47], Loss: 0.4985
Epoch [2/50], Step [40/47], Loss: 0.4512
Epoch [3/50], Step [10/47], Loss: 0.4130
Epoch [3/50], Step [20/47], Loss: 0.3849
Epoch [3/50], Step [30/47], Loss: 0.3462
Epoch [3/50], Step [40/47], Loss: 0.2651
Epoch [4/50], Step [10/47], Loss: 0.2051
Epoch [4/50], Step [20/47], Loss: 0.3770
Epoch [4/50], Step [30/47], Loss: 0.1353
Epoch [4/50], Step [40/47], Loss: 0.2282
Epoch [5/50], Step [10/47], Loss: 0.1752
Epoch [5/50], Step [20/47], Loss: 0.3100
Epoch [5/50], Step [30/47], Loss: 0.1615
Epoch [5/50], Step [40/47], Loss: 0.1695
Epoch [6/50], Step [10/47], Loss: 0.2852
Epoch [6/50], Step [20/47], Loss: 0.2417
Epoch [6/50], Step [30/47], Loss: 0.2033
Epoch [6/50], Step [40/47], Loss: 0.2037
Epoch [7/50], St

In [5]:
import numpy as np 

def evaluate(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to GPU

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Test Accuracy: {100 * correct / total:.2f}%')

def train_evaluate(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to GPU

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Train Accuracy: {100 * correct / total:.2f}%')

def gen_error_evaluate(model):
    model.eval()
    loss_list_train = []
    with torch.no_grad():
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to GPU
            outputs = model(images)
            loss = criterion(outputs, labels).cpu().detach().numpy()
            loss_list_train.append(loss)
            
    print(f'Train loss: {np.average(loss_list_train)}')

    loss_list_test = [] 
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to GPU
            outputs = model(images)
            loss = criterion(outputs, labels).cpu().detach().numpy()
            loss_list_test.append(loss)
            
    print(f'Test loss:{np.average(loss_list_test)}')
    print(f'Generalization error: {np.average(loss_list_test) - np.average(loss_list_train)}')
    
evaluate(model, val_loader)



# print("Pruning removed. Model restored to dense weights.")

Test Accuracy: 96.50%


In [6]:
train_evaluate(model, train_loader)

Train Accuracy: 100.00%


In [7]:
gen_error_evaluate(model)

Train loss: 0.003258474636822939
Test loss:0.17137287557125092
Generalization error: 0.16811439394950867


In [8]:


# MNIST partial

test_acc_classical_target  = [96.67, 96.67, 97.33]
num_para_classical_target  = [6690]
