### Collect circuit metrics

In [None]:
from qiskit_ibm_runtime import RuntimeJobFailureError
from collections import defaultdict
from sqlalchemy import update

# Async Job Management
def get_missing_circuit_job_ids(db_manager) -> List[str]:
    """Get all job IDs with pending results."""
    with db_manager.session() as session:
        query = (
            select(db_manager.trial_class.job_id)
            .where(
                db_manager.trial_class.job_id != None,
                db_manager.trial_class.circuit_depth == None,
                db_manager.trial_class.is_failed == False
            )
            .distinct()
            .order_by(func.random())
            .limit(1000)
        )
        results = session.execute(query).scalars().all()
        return list(results)

async def update_job_results(db_manager, job_id: str, save_circuits: Optional[bool] = False) -> None:
    """
    Fetch and update results for a specific job.

    Args:
        job_id: IBM Quantum job ID
        service: QiskitRuntimeService instance
    """
    #return # TMP
    job = None
    for svc in [service, service_old]:  # try each service in order until job is found
        try:
            job = await asyncio.to_thread(svc.job, job_id)
            break
        except RuntimeJobNotFound:
            continue
    
    if job is None:
        # Handle the case where job wasn't found in any service
        print(f"Job not found in any service for trial {trial}")
        return
    
    # Update all trials for this job
    print(f"Fetching trials for job {job_id}")
    trials = db_manager.find_trials(job_id=job_id, circuit_depth=None, is_failed=False)
    updated_count = 0
    print(f"Saving results from {job_id}")
    with db_manager.session() as session:
        for trial in trials:
            circuit = job.inputs['pubs'][trial.job_pub_idx][0]
            trial.load_circuit_metrics(circuit=circuit)
            trial.created_at = job.creation_date
            if save_circuits:
                trial.circuit = circuit
            session.merge(trial)
            updated_count += 1
        session.commit()

        print(f"Updated {updated_count} trials for job {job_id}")

async def _update_job_results(db_manager, job_id: str, save_circuits: Optional[bool] = False) -> None:
    """
    Fetch and update results for a specific job.

    Args:
        job_id: IBM Quantum job ID
    """
    
    job = None
    for svc in [service, service_old]:
        try:
            job = await asyncio.to_thread(svc.job, job_id)
            break
        except RuntimeJobNotFound:
            continue
    
    if job is None:
        print(f"Job not found in any service: {job_id}")
        return
    
    print(f"Processing job {job_id}")
    
    with db_manager.session() as session:
        query = (
            select(
                db_manager.trial_class.id,
                db_manager.trial_class.job_pub_idx
            )
            .where(
                db_manager.trial_class.job_id == job_id,
                db_manager.trial_class.circuit_depth == None,
                db_manager.trial_class.is_failed == False
            )
        )
        trial_data = session.execute(query).all()
    
    if not trial_data:
        print(f"No trials to update for job {job_id}")
        return
    
    # Compute metrics for all circuits upfront
    metrics_by_id = {}
    for trial_id, job_pub_idx in trial_data:
        circuit = job.inputs['pubs'][job_pub_idx][0]
        
        # Compute metrics (adapted from _compute_circuit_metrics)
        counts = defaultdict(int)
        for inst in circuit.data:
            counts[len(inst.qubits)] += 1
        
        op_counts = circuit.count_ops()
        metrics_by_id[trial_id] = {
            'circuit_depth': circuit.depth(),
            'circuit_op_counts': op_counts,
            'circuit_num_single_qubit_gates': counts[1],
            'circuit_num_gates': circuit.size() - op_counts.get("measure", 0),
            'circuit_num_qubits': len({q for instr, qargs, _ in circuit.data for q in qargs})
        }
    
    # Bulk update using a single session and commit
    print(f"Saving {len(metrics_by_id)} trial metrics for job {job_id}")
    with db_manager.session() as session:
        for trial_id, metrics in metrics_by_id.items():
            stmt = (
                update(db_manager.trial_class)
                .where(db_manager.trial_class.id == trial_id)
                .values(**metrics)
            )
            session.execute(stmt)
        session.commit()
    
    print(f"Updated {len(metrics_by_id)} trials for job {job_id}")

async def update_all_pending_results(db_manager, batch_size: int = 5, save_circuits: Optional[bool] = False) -> None:
    """
    Update all pending job results asynchronously.

    Args:
        service: QiskitRuntimeService instance
        batch_size: Number of concurrent job fetches
    """
    while True:
        pending_jobs = get_missing_circuit_job_ids(db_manager)
        if len(pending_jobs) == 0:
            break

        print(f"Updating circuits from {len(pending_jobs)} jobs")

        # Process jobs in batches to avoid overwhelming the API
        for i in range(0, len(pending_jobs), batch_size):
            batch = pending_jobs[i : i + batch_size]
            tasks = [update_job_results(db_manager, job_id, save_circuits=save_circuits) for job_id in batch]

            batch_num = i // batch_size + 1
            total_batches = (len(pending_jobs) + batch_size - 1) // batch_size
            print(f"Processing batch {batch_num}/{total_batches}")

            await asyncio.gather(*tasks, return_exceptions=True)

await update_all_pending_results(benchmark_db, batch_size=1, save_circuits=True)

In [None]:
# multi-threaded fetching circuits from IBM Quantum jobs
async def load_and_save_circuit_metrics(trial: RandomBooleanFunctionTrial):
    if trial.circuit_depth is None:
        circuit = None
        for svc in [service, service_old]:  # try each service in order until job is found
            try:
                circuit = await trial.get_ibm_circuit(svc)
                trial.load_circuit_metrics(circuit=circuit)
                break
            except RuntimeJobNotFound:
                continue
        
        if circuit is None:
            # Handle the case where job wasn't found in any service
            print(f"Job not found in any service for trial {trial}")
            return
            
        benchmark_db.save_trial(trial)

for num_vars in range(2, 31):
    for complexity in range(1, 21):
        print(num_vars, complexity)
        batch_size = max(5, 50 - (num_vars * complexity))
        missing_trial_count = benchmark_db.query(
            select(func.count(RandomBooleanFunctionTrial.id))
            .select_from(RandomBooleanFunctionTrial).join(RandomBooleanFunction)
            .where(RandomBooleanFunctionTrial.circuit_depth == None, RandomBooleanFunction.num_vars == num_vars, RandomBooleanFunction.complexity == complexity, RandomBooleanFunctionTrial.is_failed == False)
        )[0]
        if missing_trial_count == 0:
            continue
        for _ in range(missing_trial_count // batch_size + 1):
            tasks = [load_and_save_circuit_metrics(trial) for trial in benchmark_db.query(
                select(RandomBooleanFunctionTrial)
                .join(RandomBooleanFunction)
                .where(RandomBooleanFunctionTrial.circuit_depth == None, RandomBooleanFunctionTrial.is_failed == False)
                .limit(batch_size)
            )]
            asyncio.run(asyncio.wait(tasks))
            print(f"Completed {(_+1)*batch_size}/{missing_trial_count} trials")

In [None]:
# multi-threaded fetching circuits from IBM Quantum jobs
async def load_and_save_circuit_metrics(trial: RandomBooleanFunctionTrial):
    if trial.circuit_depth is None:
        circuit = None
        for svc in [service, service_old]:  # try each service in order until job is found
            try:
                circuit = await trial.get_ibm_circuit(svc)
                trial.load_circuit_metrics(circuit=circuit)
                break
            except RuntimeJobNotFound:
                continue
        
        if circuit is None:
            # Handle the case where job wasn't found in any service
            print(f"Job not found in any service for trial {trial}")
            return
            
        benchmark_db.save_trial(trial)

BATCH_SIZE = 10
target_trial_count = 4000
current_trial_count= benchmark_db.query(
    select(func.count(RandomBooleanFunctionTrial.id))
    .where(RandomBooleanFunctionTrial.circuit_depth != None)
)[0]
for _ in range((target_trial_count - current_trial_count) // BATCH_SIZE + 1):
    tasks = [load_and_save_circuit_metrics(trial) for trial in benchmark_db.query(
        select(RandomBooleanFunctionTrial)
        .join(RandomBooleanFunction)
        .where(RandomBooleanFunctionTrial.circuit_depth == None, RandomBooleanFunctionTrial.is_failed == False)
        .order_by(func.random())
        .limit(BATCH_SIZE)
    )]
    asyncio.run(asyncio.wait(tasks))
    print(f"Completed {current_trial_count + _*BATCH_SIZE}/{target_trial_count} trials")


### Refresh created_at for data

In [None]:
async def refresh_created_at(trial: RandomBooleanFunctionTrial):
    job = None
    for svc in [service, service_old]:  # try each service in order until job is found
        try:
            job = await asyncio.to_thread(svc.job, trial.job_id)
            break
        except RuntimeJobNotFound:
            continue

    if not job:
        return
    
    trial.created_at = job.creation_date
    benchmark_db.save_trial(trial)

def get_trials_by_ids(db: BenchmarkDatabase, ids: List[int]) -> List[RandomBooleanFunctionTrial]:
    return db.query(
        select(RandomBooleanFunctionTrial)
        .where(RandomBooleanFunctionTrial.id.in_(ids))
        .options(joinedload(RandomBooleanFunctionTrial.problem))
    )

train_ids = []
test_ids = []
with open("train_ids.json", "r") as f:
    train_ids = json.load(f)
with open("test_ids.json", "r") as f:
    test_ids = json.load(f)

all_trials = get_trials_by_ids(benchmark_db, train_ids + test_ids)[310+185+104+16+6:]

BATCH_SIZE = 2
for i in range(0, len(all_trials), BATCH_SIZE):
    batch = all_trials[i : i + BATCH_SIZE]
    tasks = [refresh_created_at(trial) for trial in batch]
    asyncio.run(asyncio.wait(tasks))
    print(f"Refreshed created_at for trials {i} to {i + len(batch)}")
    
