# CUDA-Q circuits backend

> [CUDA-Q](https://github.com/NVIDIA/cuda-quantum) based quantum circuit backend.

In [None]:
#| default_exp platform.backends.circuits_cudaq

In [None]:
#| export
from genQC.imports import *
from genQC.platform.backends.base_backend import BaseBackend
from genQC.platform.circuits_instructions import CircuitInstructions

import cudaq

## Utils

In [None]:
#| export
@dataclass
class ParametrizedCudaqKernel:
    kernel: cudaq.kernel
    params: list[float] # currently only support 1 angle per gate

## Backend

In [None]:
#| export
class CircuitsCudaqBackend(BaseBackend):

    BASIC_BACKEND_TYPE = type[cudaq.kernel]

    def __init__(self, target: str = "qpp-cpu") -> None:
        cudaq.reset_target()
        cudaq.set_target(target) # 'nvidia'
    
    def backend_to_genqc(self):
        raise NotImplementedError("Not implemeted cudaq to genQC.")

    # Has to match with insides of belows kernel
    KERNEL_VOCABULARY = {"h":1, 
                         "cx":2, 
                         "z":3, 
                         "x":4, 
                         "y":5, 
                         "ccx":6, 
                         "swap":7,
                         "rx":8,
                         "ry":9,
                         "rz":10,
                         "cp":11,} 

    def _construct_kernel(self,
                          gate_list: List[str],
                          target_1_nodes_list: List[int],
                          target_2_nodes_list: List[int],
                          control_1_nodes_list: List[int],
                          control_2_nodes_list: List[int]
                         ) -> cudaq.kernel:
        """Construct a `cudaq.kernel` from provided paramters."""
  
        num_gates = len(gate_list)
        gate_list = [self.KERNEL_VOCABULARY[g] for g in gate_list]

        # Note: `@cudaq.kernel` decorator has a overhead of 20ms, regardless of the for-loop inside
        
        @cudaq.kernel
        def place_gate_kernel(gate: int, 
                              qvector: cudaq.qview,
                              target_1: int, 
                              target_2: int, 
                              control_1: int, 
                              control_2: int,
                              theta: float):       
            
            if   gate == 1: h(qvector[target_1])
            elif gate == 2: cx(qvector[control_1], qvector[target_1])
            elif gate == 3: z(qvector[target_1])
            elif gate == 4: x(qvector[target_1])
            elif gate == 5: y(qvector[target_1])
            elif gate == 6: x.ctrl(qvector[control_1], qvector[control_2], qvector[target_1])
            elif gate == 7: swap(qvector[target_1], qvector[target_2])
                
            elif gate == 8:  rx(theta, qvector[target_1])
            elif gate == 9:  ry(theta, qvector[target_1])
            elif gate == 10: rz(theta, qvector[target_1])
                
            elif gate == 11: 
                # R1 applies the unitary transformation; i.e. it is a phase gate
                # R1(λ) = | 1     0    |
                #         | 0  exp(iλ) |
                r1.ctrl(theta, qvector[target_1], qvector[target_2])
                
      
        @cudaq.kernel  
        def kernel(input_state: list[complex], thetas: list[float]):
            qvector = cudaq.qvector(input_state)
            for i in range(num_gates):
                place_gate_kernel(gate_list[i], qvector, target_1_nodes_list[i], target_2_nodes_list[i], control_1_nodes_list[i], control_2_nodes_list[i], thetas[i])
    
        return kernel

    def check_error_circuit(self, 
                            gate: str, 
                            num_target_nodes: int, 
                            num_control_nodes: int) -> bool:
        """Check number of connections of given gate. Used to check for error circuits."""

        if gate not in self.KERNEL_VOCABULARY:
            raise NotImplementedError(f"Unknown gate {gate}, not in `self.KERNEL_VOCABULARY`.")
            
        if gate in ["h", "z", "x", "y", "rx", "ry", "rz"]:
            if num_target_nodes != 1 or num_control_nodes !=0: return False

        elif gate in ["cx"]:
            if num_target_nodes != 1 or num_control_nodes !=1: return False

        elif gate in ["ccx"]:
            if num_target_nodes != 1 or num_control_nodes !=2: return False

        elif gate in ["swap", "cp"]:
            if num_target_nodes != 2 or num_control_nodes !=0: return False

        else:
            raise NotImplementedError(f"Unknown gate {gate}, implemetation is faulty!")

        return True
  
    def genqc_to_backend(self, 
                         instructions: CircuitInstructions,
                         **kwargs) -> cudaq.kernel:
        """Convert given genQC `CircuitInstructions` to a `cudaq.kernel`."""

        _params = torch.tensor([
                                instruction.params if instruction.params else torch.nan 
                                for instruction in instructions.data
                               ])   # ... [seq, nP]

        if not torch.isnan(_params).any():
            assert _params.shape[1] == 1  #only support nP=1 for now
            _params = _params.squeeze()

        #--------------------
        
        # num_qubits = instructions.num_qubits
        num_gates  = instructions.length

        # @cudaq.kernel can only take list[int] and no str directly
        # -> we have to map everything to list[int]        
        # set default value to 9999 so an error wil be raised if we have a faulty tensor encoding
        
        gate_list = []
        target_1_nodes_list  = [9999] * num_gates
        target_2_nodes_list  = [9999] * num_gates
        control_1_nodes_list = [9999] * num_gates
        control_2_nodes_list = [9999] * num_gates

        for i, instruction in enumerate(instructions.data):

            gate          = instruction.name.lower()
            control_nodes = instruction.control_nodes
            target_nodes  = instruction.target_nodes
            
            num_target_nodes  = len(target_nodes)
            num_control_nodes = len(control_nodes)
            
            if not self.check_error_circuit(gate, num_target_nodes, num_control_nodes):
                return None
            
            gate_list.append(gate)
  
            if num_target_nodes > 0:
                target_1_nodes_list[i] = target_nodes[0]
                if num_target_nodes > 1: 
                    target_2_nodes_list[i] = target_nodes[1]      
            
            if num_control_nodes > 0:
                control_1_nodes_list[i] = control_nodes[0]  
                if num_control_nodes > 1: 
                    control_2_nodes_list[i] = control_nodes[1]  
                    
        #--------------------
        _kernel = self._construct_kernel(gate_list, target_1_nodes_list, target_2_nodes_list, control_1_nodes_list, control_2_nodes_list)

        return ParametrizedCudaqKernel(kernel=_kernel, params=_params.tolist())
    
    def get_unitary(self, parametrizedCudaqKernel: ParametrizedCudaqKernel, num_qubits: int) -> np.ndarray:
        """Return the unitary matrix of a `cudaq.kernel`. Currently relies on simulation, could change in future releases of cudaq."""

        kernel, thetas = parametrizedCudaqKernel.kernel, parametrizedCudaqKernel.params
        
        N = 2**num_qubits
        U = np.zeros((N, N), dtype=np.complex128)
        
        for j in range(N): 
            state_j    = np.zeros((N), dtype=np.complex128) 
            state_j[j] = 1
            
            U[:, j] = np.array(cudaq.get_state(kernel, state_j, thetas), copy=False)
            
        return U

    def draw(self, kernel: cudaq.kernel, num_qubits: int, **kwargs) -> None:
        """Draw the given `cudaq.kernel` using cudaq.""" 
        c    = [0] * (2**num_qubits)
        c[0] = 1
        print(cudaq.draw(kernel, c))

## Test 

In [None]:
from genQC.platform.tokenizer.circuits_tokenizer import CircuitTokenizer

### genqc <-> backend

In [None]:
tensor = torch.tensor([
                [1, 0, -2, 0, 0, 5],
                [0, 0,  2, 3, 4, 5],
                [0, 6, -2, 3, 0, 0],
            ], dtype=torch.int32)

params_tensor = torch.tensor([[0, 0.1, 0, 0, 2.3, 0.7]])/(2*np.pi) - 1

vocabulary   = {"h":1, "ccx":2, "swap":3, "rx":4, "cp": 5, "ry":6}
tokenizer    = CircuitTokenizer(vocabulary)
instructions = tokenizer.decode(tensor, params_tensor)

instructions.print()

CircuitInstruction(name='h', control_nodes=[], target_nodes=[0], params=[0.0])
CircuitInstruction(name='ry', control_nodes=[], target_nodes=[2], params=[0.10000012069940567])
CircuitInstruction(name='ccx', control_nodes=[0, 2], target_nodes=[1], params=[0.0])
CircuitInstruction(name='swap', control_nodes=[], target_nodes=[1, 2], params=[0.0])
CircuitInstruction(name='rx', control_nodes=[], target_nodes=[1], params=[2.299999713897705])
CircuitInstruction(name='cp', control_nodes=[], target_nodes=[0, 1], params=[0.7000001072883606])


In [None]:
N = 2**instructions.num_qubits

backend = CircuitsCudaqBackend()
parametrizedCudaqKernel = backend.genqc_to_backend(instructions)

kernel, thetas = parametrizedCudaqKernel.kernel, parametrizedCudaqKernel.params

c    = [0] * N
c[0] = 1

print(cudaq.draw(kernel, c, thetas))

results = cudaq.sample(kernel, c, thetas)
print("Measurement distribution:" + str(results))

        ╭───╮                                 
q0 : ───┤ h ├─────●─────────────────────●─────
        ╰───╯   ╭─┴─╮   ╭─────────╮╭────┴────╮
q1 : ───────────┤ x ├─╳─┤ rx(2.3) ├┤ r1(0.7) ├
     ╭─────────╮╰─┬─╯ │ ╰─────────╯╰─────────╯
q2 : ┤ ry(0.1) ├──●───╳───────────────────────
     ╰─────────╯                              

Measurement distribution:{ 000:85 010:401 100:85 110:429 }



In [None]:
U = backend.get_unitary(parametrizedCudaqKernel, instructions.num_qubits)
print(np.round(U, 2))

[[ 0.29-0.03j  0.29-0.03j  0.  +0.j    0.  +0.j   -0.01-0.64j -0.01-0.64j  0.  +0.j    0.  +0.j  ]
 [ 0.29+0.j   -0.29+0.j    0.  -0.03j  0.  +0.03j -0.01+0.j    0.01+0.j    0.  -0.64j  0.  +0.64j]
 [ 0.01-0.64j  0.01-0.64j  0.  +0.j    0.  +0.j    0.29+0.03j  0.29+0.03j  0.  +0.j    0.  +0.j  ]
 [ 0.42-0.49j -0.42+0.49j  0.01+0.01j -0.01-0.01j -0.02+0.02j  0.02-0.02j  0.22+0.19j -0.22-0.19j]
 [ 0.  +0.j    0.  +0.j    0.29-0.03j  0.29-0.03j  0.  +0.j    0.  +0.j   -0.01-0.64j -0.01-0.64j]
 [ 0.  -0.03j  0.  +0.03j  0.29+0.j   -0.29+0.j    0.  -0.64j  0.  +0.64j -0.01+0.j    0.01+0.j  ]
 [ 0.  +0.j    0.  +0.j    0.01-0.64j  0.01-0.64j  0.  +0.j    0.  +0.j    0.29+0.03j  0.29+0.03j]
 [ 0.01+0.01j -0.01-0.01j  0.42-0.49j -0.42+0.49j  0.22+0.19j -0.22-0.19j -0.02+0.02j  0.02-0.02j]]


In [None]:
U = np.matrix(U)
assert np.allclose(U.H@U, np.eye(N)) and  np.allclose(U@U.H, np.eye(N))

## Time targets

In [None]:
def time_target(target):
    if cudaq.has_target(target):
        cudaq.reset_target()
        cudaq.set_target(target)
        res = %timeit -o -q backend.get_unitary(parametrizedCudaqKernel, instructions.num_qubits)
        print(f"Timeit {target=}: {str(res)}")

In [None]:
targets = ["qpp-cpu", "nvidia"]
for target in targets:
    time_target(target)

Timeit target='qpp-cpu': 1.08 ms ± 58.9 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
Timeit target='nvidia': 13.5 ms ± 3.14 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# Export -

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()