# Training Data Generation
This section generates the data for training the machine learning model.

Our goal is to train a model that predicts the best combination of 3 optimization passes of a pool of 20+ passes provided by Qiskit.

We set the limit to 3, because larger circuits would need a lot of time to run many or all applicable optimizers. Setting the budget of optimization passes to 3 keeps it in a reasonable timeframe to optimize a circuit.

For each circuit and for each combination of 3 optimization passes for a circuit we calculate:
1. The explanatory variables of each circuit, including:
    - Circuit depth
    - Circuit width
    - Circuit size
    - C-not gate count
    - T gate count
    - Two-qubit gate count
    - Three-qubit gate count

    We decided on those variables because they are commonly used to classify circuits.

2. The quality characteristics of the circuit after it was transpiled and optimized with the combination of 3 optimization passes, including:
    - Circuit depth
    - Circuit width
    - C-not gate count

    We chose these characteristics because they are most relevant for improving the circuit runtime performance in the NISQ-era.


Based on the quality characteristics

In [1]:
import numpy as np
import os
import csv
import time
from collections import defaultdict
from itertools import combinations
from qiskit import QuantumCircuit, qasm2, transpile
from qiskit.providers.fake_provider import GenericBackendV2
from qiskit.circuit import library as lib
from qiskit.visualization import pass_manager_drawer, staged_pass_manager_drawer
from qiskit.passmanager.base_tasks import Task
from qiskit.transpiler import PassManager, StagedPassManager, generate_preset_pass_manager
from qiskit.transpiler.preset_passmanagers.plugin import list_stage_plugins
from qiskit.transpiler.passes import (
    ALAPScheduleAnalysis,
    InverseCancellation,
    PadDynamicalDecoupling,
    UnitarySynthesis,
    Unroll3qOrMore,
    Collect2qBlocks,
    ConsolidateBlocks,
    Optimize1qGates,
    Collect1qRuns,
    HoareOptimizer,
    Optimize1qGates,
    Optimize1qGatesDecomposition,
    Collect1qRuns,
    Collect2qBlocks,
    CollectMultiQBlocks,
    CollectAndCollapse,
    CollectLinearFunctions,
    CollectCliffords,
    ConsolidateBlocks,
    InverseCancellation,
    CommutationAnalysis,
    CommutativeCancellation,
    CommutativeInverseCancellation,
    Optimize1qGatesSimpleCommutation,
    RemoveDiagonalGatesBeforeMeasure,
    RemoveResetInZeroState,
    RemoveFinalReset,
    HoareOptimizer,
    TemplateOptimization,
    ResetAfterMeasureSimplification,
    OptimizeCliffords,
    ElidePermutations,
    OptimizeAnnotated,
    Split2QUnitaries,
    RemoveIdentityEquivalent,
    ContractIdleWiresInControlFlow,
    OptimizeCliffordT,
)

In [None]:
# Define the list of optimizations to be applied

OPTIMIZER_CLASSES = [
    Optimize1qGates,
    Optimize1qGatesDecomposition,
    Collect1qRuns,
    Collect2qBlocks,
    CollectMultiQBlocks,
    CollectLinearFunctions,
    CollectCliffords,
    CommutationAnalysis,
    CommutativeCancellation,
    RemoveDiagonalGatesBeforeMeasure,
    RemoveResetInZeroState,
    RemoveFinalReset,
    HoareOptimizer,
    ResetAfterMeasureSimplification,
    OptimizeCliffords,
    ElidePermutations,
    OptimizeAnnotated,
    Split2QUnitaries,
    RemoveIdentityEquivalent,
    ContractIdleWiresInControlFlow,
    OptimizeCliffordT,
]

In [None]:
# Functions to handle combinations of optimizer passes

def generate_optimizer_combinations(min_size=1, max_size=3) -> list[list[Task]]:
    """
    Generate all combinations of optimizer passes up to max_size.
    Returns a list of lists, each inner list is a combination of instantiated passes.
    """
    all_combinations = []
    for r in range(min_size, max_size + 1):
        for combo in combinations(OPTIMIZER_CLASSES, r):
            # Instantiate each pass (no-arg constructors assumed)
            all_combinations.append([cls() for cls in combo])
    return all_combinations

def get_configuration_vector(optimizers: list[Task]) -> list[int]:
    """
    Generate a configuration vector for the given combinations of optimizer passes.
    Each pass is represented by a 1 in the vector, and 0 otherwise.
    """
    vector = [0] * len(OPTIMIZER_CLASSES)
    for i, optimizer_class in enumerate(OPTIMIZER_CLASSES):
        if optimizer_class() in optimizers:
            vector[i] = 1
    return vector

In [None]:
# Functions to get variables for model training

def get_explanatory_variables(circuit: QuantumCircuit) -> list[int]:
    """
    Extract explanatory variables from the circuit.
    Returns a list of integers representing the number of qubits, width, depth, and gate counts.
    """
    cnot_count = circuit.count_ops().get('cx') or 0
    t_count = circuit.count_ops().get('t') or 0
    count_multi_qbit_gates = defaultdict(int)
    for entry in circuit.data:
        count_multi_qbit_gates[len(entry.qubits)] += 1
    two_qbit_gate_count = count_multi_qbit_gates.get(2) or 0
    three_qbit_gate_count = count_multi_qbit_gates.get(3) or 0

    return [
        circuit.num_qubits,
        circuit.depth(),
        circuit.width(),
        circuit.size(),
        cnot_count,
        t_count,
        two_qbit_gate_count,
        three_qbit_gate_count,
    ]

def get_quality_data(circuit: QuantumCircuit) -> list[int]:
    """
    Extract quality data from the circuit.
    Returns a list of integers representing various quality metrics.
    """
    cnot_count = circuit.count_ops().get('cx') or 0
    t_count = circuit.count_ops().get('t') or 0
    # Not all of these are used atm
    return [
        circuit.depth(),
        circuit.width(),
        circuit.size(),
        cnot_count,
        t_count,
        circuit.num_ancillas,
        circuit.num_captured_stretches,
        circuit.num_captured_vars,
        circuit.num_declared_stretches,
        circuit.num_clbits,
        circuit.num_identifiers,
        circuit.num_input_vars,
        circuit.num_stretches,
        circuit.num_vars,
        circuit.num_unitary_factors(),
        circuit.num_tensor_factors(),
        circuit.num_connected_components(),
    ]


## Generation Process

The following code runs all the combinations of optimization passes on all given circuits from the circuit dataset.
**The generation takes very long so it is not advised to run it here in the Jupyter Notebook.** You can alternatively view our pre-computed output data.

This code produces the following output data:

`./transpiled_data/` contains .csv files with the quality characteristics for each circuit transpiled with each combination of optimization passes.
`./circuit_data/` contains .csv files with the explanatory variables.

Furthermore, it's possible to adjust the circuit dataset, the amount of combination of, and the maximum circuit size to consider in the training data.

In [None]:
circuit_path = "circuits_qiskit_opt0" # Path to the directory containing circuit files
size_of_each_combination = 3  # Amount of optimization passes to use for each optimization stage - we chose 3 as previously explained
max_circuit_size = 25  # Maximum circuit size to process

for circuit_class_dir in os.listdir(circuit_path):
    class_dir_path = os.path.join(circuit_path, circuit_class_dir)
    if not os.path.isdir(class_dir_path):
        continue

    with open(f'./circuit_data/{circuit_class_dir}.csv', mode='a', newline='') as circuit_file:
        circuit_writer = csv.writer(circuit_file)

        for filename in os.listdir(class_dir_path):
            if not filename.endswith('.qasm'):
                continue
            
            transpiled_data_file = f'./transpiled_data/{circuit_class_dir}/{filename}.csv'
            # Skip if already processed
            if os.path.exists(transpiled_data_file):
                continue 

            # Extract the qubit count from the filename
            qubit_count = filename.split("_")[-1][:-5]
            circuit_class = filename[:-(5 + len(qubit_count) + 1)]

            # Skip big circuits because they take too long to process for the hackathon
            if int(qubit_count) > max_circuit_size:
                continue

            print(f"Processing circuit: {filename} with count {qubit_count} of class {circuit_class}")

            class_dir_path = os.path.join(circuit_path, circuit_class_dir, filename)

            # interpret openqasm als qiskit QuantumCircuit
            circuit = qasm2.load(filename=class_dir_path, custom_instructions=qasm2.LEGACY_CUSTOM_INSTRUCTIONS)

            optimizer_combinations = generate_optimizer_combinations(size_of_each_combination, size_of_each_combination)

            # Do passes until optimization to get explanatory variables from the circuit we use to start the optimization
            pass_manager = StagedPassManager(stages=["init", "layout", "routing", "translation"])
            translated = pass_manager.run(circuit)
            x_variables = get_explanatory_variables(translated)
            circuit_writer.writerow([filename] + x_variables)

            os.makedirs(f'./transpiled_data/{circuit_class_dir}', exist_ok=True)
            with open(transpiled_data_file, mode='a', newline='') as transpiled_file:

                transpiled_writer = csv.writer(transpiled_file)
                opt_start_time = time.perf_counter()

                for i, optimizations in enumerate(optimizer_combinations):
                    # Do optimization pass
                    optimize_pass_manager_level0 : StagedPassManager = generate_preset_pass_manager(optimization_level=0)
                    optimize_pass_manager_level0.optimization = PassManager(optimizations)

                    # load best x (10) configurations per circuit - with columns: circuit_name, features, configuration_vector
                    vec = get_configuration_vector(optimizations)

                    # Transpile it by calling the run method of the pass manager
                    start_time = time.perf_counter()
                    transpiled_level0 = optimize_pass_manager_level0.run(translated)
                    elapsed_time = time.perf_counter() - start_time

                    transpiled_writer.writerow([vec] + get_quality_data(transpiled_level0) + [elapsed_time])

                opt_elapsed_time = time.perf_counter() - opt_start_time
                print(f"Optimizations took {opt_elapsed_time:.2f} seconds for {filename} with {len(optimizer_combinations)} combinations.")

