In [None]:
!pip install qiskit-ibmq-provider

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install ipython-autotime
%load_ext autotime

In [None]:
!git clone https://github.com/mit-han-lab/torchquantum.git

In [None]:
import os
current_directory = os.getcwd()
print("Current Directory:", current_directory)
new_directory = os.path.join(current_directory, 'torchquantum')
os.chdir(new_directory)
new_current_directory = os.getcwd()
print("New Current Directory:", new_current_directory)

In [None]:
!pip install --editable .

In [None]:
# IMPORTS -----------------------------------------------
import threading
import random
import time
import csv
import datetime
import torch
import torch.nn.functional as F
import torch.optim as optim
import torchquantum as tq
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd
#import qiskit-ibmq-provider

In [None]:
# CMSCA CODE -----------------------------------------------
# Server class
class Server:
    # Constructor
    def __init__(self, limit):
        self.limit = limit                      # limit for number of workers
        self.current_workers = 0                # current number of workers who are connected
        self.lock = threading.Lock()            # mutex lock for when max workers reached
        self.stop_event = threading.Event()     # enables us to stop the server and workers after a timelimit so we can print worker metrics
        self.state = {}
        self.all_states = []

    # A worker connects
    def connect(self):
        with self.lock:                             # mutex for multithreading
            if self.current_workers < self.limit:   # check current num workers against worker limit
                self.current_workers += 1           # increment number of current workers
                return True                         # successful connection
            return False                            # connection refused

    # A worker disconnects
    def disconnect(self):
        with self.lock:                 # mutex for multithreading
            self.current_workers -= 1   # decrement number of current workers

    # For stopping server after we have run the sim for the desired amount of time
    def stop(self):
        self.stop_event.set()   # stops the threading

# Worker class
class Worker(threading.Thread):
    # Constructor
    def __init__(self, server, priority='normal', identifier=-1, backoff_time=2):
        super().__init__()
        self.server = server              # server instance for connecting/deconnecting
        self.priority = priority          # priority level of worker
        self.backoff_time = backoff_time  # initial backoff time
        self.init_backoff_time = backoff_time
        self.total_work_time = 0          # track total work time to see how starved/successful worker is
        self.work_time = 0                # track work time each connection/work/disconnection cycle
        self.backoff_count = 0            # track total number of backoffs (failed connection attempts)
        self.connection_attempts = 0      # trac total number of connection attempts
        self.connection_probability = 0   # Connection probability = 1 - (backoff_count / connection attempts)
        self.id = identifier

    # Worker loop for simulating connection/work/disconnection cycle
    def run(self):
        while not self.server.stop_event.is_set():  # Until the threading event ends
            connected = self.server.connect()       # Attempt connection
            self.connection_attempts += 1
            if connected:                           # If connection successful
                #print(f"{self.name}: Connected to server.")
                self.backoff_time = self.init_backoff_time  # Reset backoff time
                self.server.state[f"w{self.id}_backoff"] = -1
                try:
                    while not self.server.stop_event.is_set():  # If threading event isn't ending
                        time.sleep(1)                           # Sleep 1 second every cycle while connected, this second represents a second of work performed on server
                        self.work_time += 1                     # Track work
                        if random.random() < 0.25:              # Chance to disconnect every 1 second cycle of work
                            break
                finally:
                    self.server.disconnect()                    # Disconnect from server
                    self.total_work_time += self.work_time      # Update work total
                    self.work_time = 0                          # Reset cycle tracker
                    time.sleep(random.uniform(1, 3))            # Backoff for some time so the worker doesn't immediately reconnect and starve out the others
            else:                                   # Connection Failed
                self.backoff_count += 1
                self.handle_backoff()               # Handle backoff based on priority of worker

    # Handle worker backoff after connection refusal based on the priority of the worker
    def handle_backoff(self):
        self.server.state[f"w{self.id}_backoff"] = int(self.backoff_time) + 1
        time.sleep(self.backoff_time)
        self.backoff_time = min(self.backoff_time * 2, self.init_backoff_time)  # double backoff time every backoff

    # Get the total time a worker has performed work while connected to server
    def get_total_work_time(self):
        return self.total_work_time     # Simpy return tracker variable

# Function for lifecycle of server. For example, when we time.sleep(300), we get 5 minutes of server life
def server_lifecycle(server, workers, lifecycle=300):
    # Run server for 5 minutes
    for t in range(lifecycle):
        time.sleep(1)
        for key in server.state:
            if server.state[key] > 0:
                server.state[key] = server.state[key] - 1
        server.all_states.append(server.state.copy())

    server.stop()           # After the <lifecycle> seconds have elapsed, stop the server
    #print("Server has stopped. Gathering worker information...")
    for worker in workers:  # This loop prints the worker metrics
        worker.join()


# Instantiate server
def simulate(server, worker_count=20, server_life=30, backoff_times=[1, 2, 4]):
    workers = []
    for i in range(worker_count):
        if i % 3 == 0:
            worker = Worker(server, priority='high', identifier=i, backoff_time=backoff_times[0])
        elif i % 3 == 1:
            worker = Worker(server, priority='normal', identifier=i, backoff_time=backoff_times[1])
        else:
            worker = Worker(server, priority='low', identifier=i, backoff_time=backoff_times[2])
        workers.append(worker)
        worker.start()

    for i in range(len(workers)):
        server.state[f"w{i}_backoff"] = 0

        prio = 0
        if workers[i].priority == 'normal':
            prio = -1
        if workers[i].priority == 'high':
            prio = -2
        server.state[f"w{i}_priority"] = prio

    # Start server threading, send workers to server
    lifecycle_thread = threading.Thread(target=server_lifecycle, args=(server, workers, server_life))
    lifecycle_thread.start()
    lifecycle_thread.join()
    return workers


def write_to_csv(workers, confirm_filename=False):
    # Write worker information to CSV
    timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    csv_file = f"workers_{timestamp}.csv"

    header = ["id", "priority", "total_work_time", "backoff_count", "connection_attempts", "connection_probability", "backoff_time"]
    # Open CSV file
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=header)
        writer.writeheader()
        # Loop through workers and write their information to the CSV
        for i in range(len(workers)):
            current_write = workers[i]
            current_write.connection_probability = 0
            if current_write.connection_attempts != 0:
                current_write.connection_probability = (1-(current_write.backoff_count/current_write.connection_attempts))

            worker_info = {"id": i, "priority": current_write.priority, "total_work_time": current_write.total_work_time, "backoff_count": current_write.backoff_count, "connection_attempts": current_write.connection_attempts, "connection_probability": current_write.connection_probability, "backoff_time": current_write.init_backoff_time}
            writer.writerow(worker_info)

    if confirm_filename:
        print(f"Worker information written to {csv_file}")
    return csv_file

In [None]:
# DATA CREATION -----------------------------------------------

#CONN_LIMIT = 5
#SIM_RUNS = 3 #1500
#worker_count = 20
#server_life = 30
#for i in range(SIM_RUNS):
#    HIGH_BACKOFF = random.uniform(0.1, 2)
#    MED_BACKOFF = random.uniform(.8, 4)
#    LOW_BACKOFF = random.uniform(3, 7)
#    print(f"Simulation {i}: Worker count: {worker_count}, High: {HIGH_BACKOFF}, Med: {MED_BACKOFF}, Low: {LOW_BACKOFF}")

    # Instantiate server
#    server = Server(limit=CONN_LIMIT)
#    workers = simulate(server, worker_count=worker_count, server_life=server_life, backoff_times=[HIGH_BACKOFF, MED_BACKOFF, LOW_BACKOFF])
#    write_to_csv(workers)

#print("Simulation complete. Worker information written to CSV files.")

# Get the current directory
current_dir = os.getcwd() + '/content/drive/MyDrive/csmaca_randomized_values'
print(current_dir)

# # Unzip File
# import zipfile
# def unzip_file(zip_filepath, extract_to):
#     with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:
#         zip_ref.extractall(extract_to)
# unzip_file(current_dir, os.getcwd())

# Untar File
# import tarfile
# def extract_tar(tar_filepath, extract_to):
#     with tarfile.open(tar_filepath, 'r') as tar_ref:
#         tar_ref.extractall(extract_to)
# extract_tar(current_dir, os.getcwd())
#!tar -xvf current_dir -C os.getcwd()


In [None]:
# COMPILE DATA -----------------------------------------------

# create data for neural network.
# input features: high_target_connection_prob, normal_target_connection_prob, low_target_connection_prob, limit, high_num_workers, normal_num_workers, low_num_workers
# output: high_backoff, normal_backoff, low_backoff
def create_data(df):
    # input features
    high_target_connection_prob = df[df['priority'] == 'high']['connection_probability'].mean()
    normal_target_connection_prob = df[df['priority'] == 'normal']['connection_probability'].mean()
    low_target_connection_prob = df[df['priority'] == 'low']['connection_probability'].mean()
    limit = 5
    high_num_workers = len(df[df['priority'] == 'high'])
    normal_num_workers = len(df[df['priority'] == 'normal'])
    low_num_workers = len(df[df['priority'] == 'low'])

    # output features (backoff times)
    high_backoff = df[df['priority'] == 'high']['backoff_time'].mean()
    normal_backoff = df[df['priority'] == 'normal']['backoff_time'].mean()
    low_backoff = df[df['priority'] == 'low']['backoff_time'].mean()

    #X = [high_target_connection_prob, normal_target_connection_prob, low_target_connection_prob, limit, high_num_workers, normal_num_workers, low_num_workers]
    X = [high_target_connection_prob, normal_target_connection_prob, low_target_connection_prob] # let's try with fewer dimensions
    Y = [high_backoff, normal_backoff, low_backoff]

    # scale the input features to be between 0 and 10
    X = [x * 1 for x in X]
    # scale the output features to be between 0 and 10
    Y = [y * 1 for y in Y]
    return X, Y

# List all CSV files in the current directory
current_dir = "/content/drive/MyDrive/csmaca_randomized_values"
csv_files = [file for file in os.listdir(current_dir) if file.endswith('.csv')]

# Create an empty dataframe to store the compiled outputs
compiled_df = pd.DataFrame()

# Iterate over each CSV file
for idx, csv_file in enumerate(csv_files):
    if idx == 1400:
      break
    # Read the CSV file into a dataframe
    df = pd.read_csv(current_dir + "/" + csv_file)
    # Skip files that don't conform to the data format
    if 'priority' not in df.columns or 'total_work_time' not in df.columns or 'backoff_count' not in df.columns or 'connection_attempts' not in df.columns or 'connection_probability' not in df.columns:
        print(f"Skipping {csv_file} as it does not conform to the data format.")
        continue

    # Perform the desired operations on the dataframe
    X, Y = create_data(df)

    # Create a new row for the compiled dataframe
    new_row = pd.DataFrame({'CSV File': [csv_file], 'X': [X], 'Y': [Y]})

    # Append the new row to the compiled dataframe
    compiled_df = pd.concat([compiled_df, new_row], ignore_index=True)

# Print the compiled dataframe
print(compiled_df)
print(compiled_df.shape)

In [None]:
print(torch.__version__)
print(tq.__version__)
# print python version
import sys
print(sys.version)

In [None]:
# TORCH QUANTUM NETWORK AND TRAINING -----------------------------------------------
import datetime
import torch
import torch.nn.functional as F
import torch.optim as optim
import argparse
import random
import numpy as np
import pandas as pd

import torchquantum as tq
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split


# Scale features and targets
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X = np.array(compiled_df['X'].tolist())
Y = np.array(compiled_df['Y'].tolist())
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(Y)
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y_scaled, dtype=torch.float32)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

class RegressionDataset:
    def __init__(self, split, X_data, y_data):
        self.split = split
        self.X_data = X_data
        self.y_data = y_data

    def __getitem__(self, index: int):
        instance = {"states": self.X_data[index], "Xlabel": self.y_data[index]}
        return instance

    def __len__(self) -> int:
        return len(self.X_data)


class Regression:
    def __init__(self, X_train, X_valid, y_train, y_valid):
        self.data = {
            'train': RegressionDataset('train', X_train, y_train),
            'valid': RegressionDataset('valid', X_valid, y_valid)
        }

    def __getitem__(self, split):
        return self.data[split]

    def __iter__(self):
        return iter(self.data)


class QModel(tq.QuantumModule):
    class QLayer(tq.QuantumModule):
        def __init__(self, n_wires, n_blocks):
            super().__init__()
            self.n_wires = n_wires # qubit count
            self.n_blocks = n_blocks # layer count
            self.rx_layers = tq.QuantumModuleList()
            self.ry_layers = tq.QuantumModuleList()
            self.rz_layers = tq.QuantumModuleList()
            self.cnot_layers = tq.QuantumModuleList()

            for _ in range(n_blocks):
                self.rx_layers.append(tq.Op1QAllLayer(op=tq.RX, n_wires=n_wires, has_params=True, trainable=True))
                self.ry_layers.append(tq.Op1QAllLayer(op=tq.RY, n_wires=n_wires, has_params=True, trainable=True))
                self.rz_layers.append(tq.Op1QAllLayer(op=tq.RZ, n_wires=n_wires, has_params=True, trainable=True))
                self.cnot_layers.append(tq.Op2QAllLayer(op=tq.CNOT, n_wires=n_wires, has_params=False, trainable=False, circular=True))

        def forward(self, q_device: tq.QuantumDevice):
            for k in range(self.n_blocks):
                self.rx_layers[k](q_device)
                self.ry_layers[k](q_device)
                self.rz_layers[k](q_device)
                self.cnot_layers[k](q_device)

    def __init__(self, n_wires, n_blocks):
        super().__init__()
        self.q_layer = self.QLayer(n_wires=n_wires, n_blocks=n_blocks)
        self.encoder = tq.StateEncoder()
        self.measure = tq.MeasureAll(tq.PauliZ)
        self.fc_out = torch.nn.Linear(n_wires, 3)  # Adjust the output layer

    def forward(self, q_device: tq.QuantumDevice, input_states, use_qiskit=False):
        devi = input_states.device
        if use_qiskit:
            encoder_circs = tq2qiskit_initialize(q_device, input_states.detach().cpu().numpy())
            q_layer_circ = tq2qiskit(q_device, self.q_layer)
            measurement_circ = tq2qiskit_measurement(q_device, self.measure)
            assembled_circs = qiskit_assemble_circs(encoder_circs, q_layer_circ, measurement_circ)
            res = self.qiskit_processor.process_ready_circs(self.q_device, assembled_circs).to(devi)
        else:
            self.encoder(q_device, input_states)
            self.q_layer(q_device)
            res = self.measure(q_device)
        return self.fc_out(res)


def train(dataflow, q_device, model, device, optimizer, qiskit=False):
    l_data = []
    for feed_dict in dataflow["train"]:
        inputs = feed_dict["states"].to(device).to(torch.complex64)
        targets = feed_dict["Xlabel"].to(device).to(torch.float32)
        outputs = model(q_device, inputs, qiskit)
        loss = F.mse_loss(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f"loss: {loss.item()}")
        l_data.append(loss.item())

    return l_data


def valid_test(dataflow, q_device, split, model, device, qiskit):
    target_all = []
    output_all = []
    with torch.no_grad():
        for feed_dict in dataflow[split]:
            inputs = feed_dict["states"].to(device).to(torch.complex64)
            targets = feed_dict["Xlabel"].to(device).to(torch.float32)

            outputs = model(q_device, inputs, qiskit)

            target_all.append(targets)
            output_all.append(outputs)
        target_all = torch.cat(target_all, dim=0)
        output_all = torch.cat(output_all, dim=0)

    loss = F.mse_loss(output_all, target_all)
    print(f"{split} set loss: {loss}")


# MAIN METHOD:
parser = argparse.ArgumentParser()
parser.add_argument("--pdb", action="store_true", help="debug with pdb")
parser.add_argument("--bsz", type=int, default=8, help="batch size for training and validation")
parser.add_argument("--n_wires", type=int, default=3, help="number of qubits")
parser.add_argument("--n_blocks", type=int, default=5, help="number of blocks")
parser.add_argument("--n_train", type=int, default=500, help="number of training samples")
parser.add_argument("--n_valid", type=int, default=300, help="number of validation samples")
parser.add_argument("--epochs", type=int, default=250, help="number of training epochs")

args, unknown = parser.parse_known_args()

if args.pdb:
    import pdb
    pdb.set_trace()

seed = 0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

# Prepare datasets
X_train, X_valid, y_train, y_valid = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)
dataset = Regression(X_train, X_valid, y_train, y_valid)

dataflow = dict()

for split in dataset:
    if split == "train":
        sampler = torch.utils.data.RandomSampler(dataset[split])
    else:
        sampler = torch.utils.data.SequentialSampler(dataset[split])
    dataflow[split] = torch.utils.data.DataLoader(
        dataset[split],
        batch_size=args.bsz,
        sampler=sampler,
        num_workers=1,
        pin_memory=True,
    )

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

model = QModel(n_wires=args.n_wires, n_blocks=args.n_blocks).to(device)

n_epochs = args.epochs
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=n_epochs)

q_device = tq.QuantumDevice(n_wires=args.n_wires)
q_device.reset_states(bsz=args.bsz)

loss_data = []
for epoch in range(1, n_epochs + 1):
    # train
    print(f"Epoch {epoch}/{n_epochs}, RL: {optimizer.param_groups[0]['lr']}")
    cur_run = train(dataflow, q_device, model, device, optimizer)
    loss_data.append(cur_run)

    # valid
    valid_test(dataflow, q_device, "valid", model, device, False)
    scheduler.step()

try:
    from qiskit import IBMQ
    from torchquantum.plugin import QiskitProcessor

    print(f"\nTest with Qiskit Simulator")
    processor_simulation = QiskitProcessor(use_real_qc=False)
    model.set_qiskit_processor(processor_simulation)
    valid_test(dataflow, q_device, "test", model, device, qiskit=True)

    # final valid
    valid_test(dataflow, q_device, "valid", model, device, True)
except Exception as e:
    print(e)


In [None]:
# EVALUATE MODEL ----------------------------------------------- (THIS NEEDS TO BE UPDATED)
# Plot loss_data
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
#%pip install scikit-learn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

plt.plot(loss_data)
plt.title('Loss Data')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
criterion = nn.MSELoss()

# Evaluate the model
with torch.no_grad():
    predictions = model(q_device, X_test)
    test_loss = criterion(predictions, y_test)
    print(f'Test Loss: {test_loss.item()}')

# Generate predictions and save the model
predictions_unscaled = scaler_y.inverse_transform(predictions.detach().numpy())
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
model_file = f"model_{timestamp}.pth"
torch.save(model.state_dict(), model_file)
print(f"Model saved to {model_file}")

# Load the model from disk for future use
loaded_model = QModel(n_wires=args.n_wires, n_blocks=args.n_blocks)
loaded_model.load_state_dict(torch.load(model_file))
loaded_model.eval()

# Make a prediction for a new data point
new_probs = [0.3, 0.2, 0.05]
print(f"Querying model with probabilities: {new_probs}")
new_probs_scaled = scaler_X.transform([new_probs])
new_probs_tensor = torch.tensor(new_probs_scaled, dtype=torch.float32)
new_prediction = loaded_model(q_device, new_probs_tensor)
new_prediction_unscaled = scaler_y.inverse_transform(new_prediction.detach().numpy())
print("Answer (unscaled):")
print(new_prediction_unscaled)

print(f"To achieve the following connection probabilities: {new_probs}, the backoff times should be: {new_prediction_unscaled[0].tolist()}")

In [None]:
# request and sweep multiple probabilities
# load model from disk
loaded_model = QModel(n_wires=args.n_wires, n_blocks=args.n_blocks)
loaded_model.load_state_dict(torch.load(model_file))
loaded_model.eval()

## generate a list of answers
probs = [[0.3, 0.2, 0.05], [0.4, 0.3, 0.1], [0.2, 0.2, 0.2],  [0.6, 0.3, 0.1]]
# scale new data to be between 0 and 10
#new_data = [x * 10 for x in new_data]

predictions = []
for new_probs in probs:
  new_probs_scaled = scaler_X.transform([new_probs])
  new_probs_tensor = torch.tensor(new_probs_scaled, dtype=torch.float32)
  new_prediction = loaded_model(q_device, new_probs_tensor)
  new_prediction_unscaled = scaler_y.inverse_transform(new_prediction.detach().numpy())
  print(f"For probabilities {new_probs} Answer (unscaled) is: {new_prediction_unscaled}")
  predictions.append(new_prediction_unscaled)
