In [None]:
import os
from dotenv import load_dotenv
import json
import random
import matplotlib.pyplot as plt
import numpy as np
import asyncio
import numpy as np
from sqlalchemy.orm import joinedload

from benchmarklib import BenchmarkDatabase
from rbf import RandomBooleanFunctionTrial, RandomBooleanFunction
from benchmarklib.compilers import CompileType, XAGCompiler

from qiskit_ibm_runtime import QiskitRuntimeService, RuntimeJobNotFound

import logging
from typing import Iterable, List, Tuple, Dict, Any, Union, Optional
import qiskit
from qiskit.providers import Backend
from qiskit import QuantumCircuit, transpile
import random

from tweedledum.bool_function_compiler import circuit_input, QuantumCircuitFunction
from tweedledum import BitVec

from sqlalchemy import select, func
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error

from benchmarklib import CompileType, BenchmarkDatabase
from benchmarklib import BatchQueue
from benchmarklib.compilers import SynthesisCompiler

In [None]:
load_dotenv()
API_TOKEN_OLD = os.getenv("API_TOKEN_OLD")
API_INSTANCE_OLD = os.getenv("API_INSTANCE_OLD")
service = QiskitRuntimeService()  # default service with new credentials
service_old = QiskitRuntimeService(
    channel='ibm_quantum_platform',
    token=API_TOKEN_OLD,
    instance=API_INSTANCE_OLD
)
backend = service.backend("ibm_rensselaer")
benchmark_db = BenchmarkDatabase("rbf.db", RandomBooleanFunction, RandomBooleanFunctionTrial)

In [None]:
from qiskit_ibm_runtime import RuntimeJobFailureError

# Async Job Management
def get_missing_circuit_job_ids(db_manager) -> List[str]:
    """Get all job IDs with pending results."""
    with db_manager.session() as session:
        query = (
            select(db_manager.trial_class.job_id)
            .where(
                db_manager.trial_class.job_id != None,
                db_manager.trial_class.circuit_depth == None,
                db_manager.trial_class.is_failed == False
            )
            .distinct()
            .limit(1000)
        )
        results = session.execute(query).scalars().all()
        return list(results)

async def update_job_results(db_manager, job_id: str) -> None:
    """
    Fetch and update results for a specific job.

    Args:
        job_id: IBM Quantum job ID
        service: QiskitRuntimeService instance
    """
    
    job = None
    for svc in [service, service_old]:  # try each service in order until job is found
        try:
            job = await asyncio.to_thread(svc.job, job_id)
            break
        except RuntimeJobNotFound:
            continue
    
    if job is None:
        # Handle the case where job wasn't found in any service
        print(f"Job not found in any service for trial {trial}")
        return
    
    # Update all trials for this job
    trials = db_manager.find_trials(job_id=job_id, circuit_depth=None, is_failed=False)
    updated_count = 0

    with db_manager.session() as session:
        for trial in trials:
            circuit = job.inputs['pubs'][trial.job_pub_idx][0]
            trial.load_circuit_metrics(circuit=circuit)
            session.merge(trial)
            updated_count += 1
        session.commit()

        print(f"Updated {updated_count} trials for job {job_id}")

async def update_all_pending_results(db_manager, batch_size: int = 5) -> None:
    """
    Update all pending job results asynchronously.

    Args:
        service: QiskitRuntimeService instance
        batch_size: Number of concurrent job fetches
    """
    while True:
        pending_jobs = get_missing_circuit_job_ids(db_manager)
        if len(pending_jobs) == 0:
            break

        print(f"Updating circuits from {len(pending_jobs)} jobs")

        # Process jobs in batches to avoid overwhelming the API
        for i in range(0, len(pending_jobs), batch_size):
            batch = pending_jobs[i : i + batch_size]
            tasks = [update_job_results(db_manager, job_id) for job_id in batch]

            batch_num = i // batch_size + 1
            total_batches = (len(pending_jobs) + batch_size - 1) // batch_size
            print(f"Processing batch {batch_num}/{total_batches}")

            await asyncio.gather(*tasks, return_exceptions=True)

await update_all_pending_results(benchmark_db, batch_size=5)

In [None]:
# multi-threaded fetching circuits from IBM Quantum jobs
async def load_and_save_circuit_metrics(trial: RandomBooleanFunctionTrial):
    if trial.circuit_depth is None:
        circuit = None
        for svc in [service, service_old]:  # try each service in order until job is found
            try:
                circuit = await trial.get_ibm_circuit(svc)
                trial.load_circuit_metrics(circuit=circuit)
                break
            except RuntimeJobNotFound:
                continue
        
        if circuit is None:
            # Handle the case where job wasn't found in any service
            print(f"Job not found in any service for trial {trial}")
            return
            
        benchmark_db.save_trial(trial)

for num_vars in range(2, 31):
    for complexity in range(1, 21):
        print(num_vars, complexity)
        batch_size = max(5, 50 - (num_vars * complexity))
        missing_trial_count = benchmark_db.query(
            select(func.count(RandomBooleanFunctionTrial.id))
            .select_from(RandomBooleanFunctionTrial).join(RandomBooleanFunction)
            .where(RandomBooleanFunctionTrial.circuit_depth == None, RandomBooleanFunction.num_vars == num_vars, RandomBooleanFunction.complexity == complexity, RandomBooleanFunctionTrial.is_failed == False)
        )[0]
        if missing_trial_count == 0:
            continue
        for _ in range(missing_trial_count // batch_size + 1):
            tasks = [load_and_save_circuit_metrics(trial) for trial in benchmark_db.query(
                select(RandomBooleanFunctionTrial)
                .join(RandomBooleanFunction)
                .where(RandomBooleanFunctionTrial.circuit_depth == None, RandomBooleanFunctionTrial.is_failed == False)
                .limit(batch_size)
            )]
            asyncio.run(asyncio.wait(tasks))
            print(f"Completed {(_+1)*batch_size}/{missing_trial_count} trials")

In [None]:
# multi-threaded fetching circuits from IBM Quantum jobs
async def load_and_save_circuit_metrics(trial: RandomBooleanFunctionTrial):
    if trial.circuit_depth is None:
        circuit = None
        for svc in [service, service_old]:  # try each service in order until job is found
            try:
                circuit = await trial.get_ibm_circuit(svc)
                trial.load_circuit_metrics(circuit=circuit)
                break
            except RuntimeJobNotFound:
                continue
        
        if circuit is None:
            # Handle the case where job wasn't found in any service
            print(f"Job not found in any service for trial {trial}")
            return
            
        benchmark_db.save_trial(trial)

BATCH_SIZE = 10
target_trial_count = 4000
current_trial_count= benchmark_db.query(
    select(func.count(RandomBooleanFunctionTrial.id))
    .where(RandomBooleanFunctionTrial.circuit_depth != None)
)[0]
for _ in range((target_trial_count - current_trial_count) // BATCH_SIZE + 1):
    tasks = [load_and_save_circuit_metrics(trial) for trial in benchmark_db.query(
        select(RandomBooleanFunctionTrial)
        .join(RandomBooleanFunction)
        .where(RandomBooleanFunctionTrial.circuit_depth == None, RandomBooleanFunctionTrial.is_failed == False)
        .order_by(func.random())
        .limit(BATCH_SIZE)
    )]
    asyncio.run(asyncio.wait(tasks))
    print(f"Completed {current_trial_count + _*BATCH_SIZE}/{target_trial_count} trials")


In [None]:
# create training and test sets
X = np.ones((1000, 7))
Y = np.zeros((1000, 1))
X_test = np.ones((100, 7))
Y_test = np.zeros((100, 1))
train_ids = []
test_ids = []
for i, trial in enumerate(benchmark_db.query(
    select(RandomBooleanFunctionTrial)
    .where(RandomBooleanFunctionTrial.circuit_depth != None)
    .order_by(func.random())
    .limit(X.shape[0] + X_test.shape[0])
    .options(joinedload(RandomBooleanFunctionTrial.problem))
)):
    if i < 1000:
        X[i, 1] = trial.circuit_num_qubits
        X[i, 2] = trial.circuit_depth
        X[i, 3] = trial.circuit_op_counts.get('ecr', 0)
        X[i, 4] = trial.circuit_op_counts.get('rz', 0)
        X[i, 5] = trial.circuit_op_counts.get('sx', 0)
        X[i, 6] = trial.circuit_op_counts.get('x', 0)
        
        Y[i, 0] = trial.calculate_success_rate()
        train_ids.append(trial.id)
    else:
        X_test[i-1000, 0] = trial.circuit_num_qubits
        X_test[i-1000, 1] = trial.circuit_depth
        X_test[i-1000, 2] = trial.circuit_op_counts.get('ecr', 0)
        X_test[i-1000, 3] = trial.circuit_op_counts.get('rz', 0)
        X_test[i-1000, 4] = trial.circuit_op_counts.get('sx', 0)
        X_test[i-1000, 5] = trial.circuit_op_counts.get('x', 0)

        Y_test[i-1000, 0] = trial.calculate_success_rate()
        test_ids.append(trial.id)

# save datasets to disk for reproducibility
np.save("train_features.npy", X)
np.save("train_labels.npy", Y)
np.save("test_features.npy", X_test)
np.save("test_labels.npy", Y_test)

with open("train_ids.json", "w") as f:
    json.dump(train_ids, f)
with open("test_ids.json", "w") as f:
    json.dump(test_ids, f)

In [None]:
X = np.load("train_features.npy")
Y = np.load("train_labels.npy")
X_test = np.load("test_features.npy")
Y_test = np.load("test_labels.npy")

# use logistic regression to predict success rate
model = LogisticRegression(max_iter=10, )
model.fit(X, np.round(Y).ravel())

In [None]:
from sklearn.metrics import confusion_matrix, log_loss
Y_pred = model.predict(X)
confusion_matrix_in = confusion_matrix(np.round(Y), Y_pred)
cross_entropy_in = log_loss(np.round(Y), model.predict_proba(X)) / len(Y)
print("In-sample Confusion Matrix:")
print(confusion_matrix_in)
print(f"In-sample Error (Cross Entropy): {cross_entropy_in}")

Y_pred = model.predict(X_test)
confusion_matrix_out = confusion_matrix(np.round(Y_test), Y_pred)
cross_entropy_out = log_loss(np.round(Y_test), model.predict_proba(X_test)) / len(Y_test)
print("Out-of-sample Confusion Matrix:")
print(confusion_matrix_out)
print(f"Out-of-sample Error (Cross Entropy): {cross_entropy_out}")

In [None]:
print(np.bincount(np.round(Y_test).ravel().astype(int)))

In [None]:
for coef, name in zip(model.coef_[0], 
                    ["Bias", "Num Qubits", "Circuit Depth", "ECR Count", "RZ Count", "SX Count", "X Count"]):
    print(f"{name}: {coef:.4f}")

In [None]:
properties = backend.properties()
error_rates = {}
for g in backend.configuration().basis_gates:
    error_rates[g] = []
    for gate_info in properties.gates:
        d = gate_info.to_dict()
        if d["gate"] == g:
            for param in d["parameters"]:
                if param["name"] == "gate_error":
                    error_rates[g].append(param["value"])
for g in error_rates:
    if len(error_rates[g]) > 0:
        print(f"{g}: avg error = {np.mean(error_rates[g]):.2e} (min={np.min(error_rates[g]):.2e}, max={np.max(error_rates[g]):.2e})")

In [None]:
import matplotlib.pyplot as plt
plt.scatter(np.linspace(0, 1, 10), np.round(Y, 1))
plt.show()