# Circuits tokenizer

>  Class to tokenize quantum circuits. Encode and decode quantum circuits into and from tensor representations.

In [None]:
#| default_exp platform.tokenizer.circuits_tokenizer

In [None]:
#| export
from genQC.imports import *
from genQC.platform.tokenizer.base_tokenizer import BaseTokenizer, Vocabulary
from genQC.platform.circuits_instructions import CircuitInstructions

In [None]:
#| export
class CircuitTokenizer(BaseTokenizer):

    def __init__(self, vocabulary: Vocabulary, sign_labels: Optional[dict[str, int]] = None) -> None:   
        if 0 in vocabulary.values():        
            print(f"[WARNING]: The value 0 is reserved for background tokens, i.e. qubit time position which are not effected by gates.")
            print(f"[WARNING]: Automatically incrementing all vocabulary values by one ...")
            vocabulary = {k:v+1 for k,v in vocabulary.items()}
            assert 0 not in vocabulary.values()
        
        super().__init__(vocabulary)
        self.sign_labels = default(sign_labels, {"control_nodes": -1, "target_nodes": +1})
        
    def tokenize(self, instructions: CircuitInstructions) -> torch.Tensor | Tuple[torch.Tensor, torch.Tensor]:
        """Convert given instructions to a tensor. Identical to `CircuitTokenizer.encode`."""
        return self.encode(instructions=instructions)
    
    def encode(self, 
               instructions: CircuitInstructions, 
               max_gates: Optional[int] = None, 
               return_params_tensor: bool = True, 
               params_4pi_normalization: bool = True, 
               randomize_params: bool = False) -> torch.Tensor | Tuple[torch.Tensor, torch.Tensor]:
        """Convert given `CircuitInstructions` to a `torch.Tensor`."""

        assert len(instructions.tensor_shape) == 2
        num_of_qubits, time = instructions.tensor_shape
        max_gates           = default(max_gates, time)
        
        tensor = torch.zeros((num_of_qubits, max_gates), dtype=torch.int32) 
        params = []

        for t, instruction in zip(range(max_gates), instructions.data):  # this way we limit the number of gates even if there are more instructions
            
            if instruction.name not in self.vocabulary: raise Warning(f"`{instruction.name}` not in vocabulary.")
            
            params.append(instruction.params)
            
            gate_id = self.vocabulary[instruction.name]  
                             
            control_qubits, target_qubits = instruction.control_nodes, instruction.target_nodes
                           
            for bit in control_qubits:
                tensor[bit, t] = gate_id * self.sign_labels["control_nodes"]
            
            for bit in target_qubits:
                tensor[bit, t] = gate_id * self.sign_labels["target_nodes"]

        if return_params_tensor: 
            num_of_max_params = max([0] + [len(para) for para in params])
            params_tensor     = torch.zeros((num_of_max_params, max_gates), dtype=torch.float32)
            
            for t, para in enumerate(params):
                para = torch.tensor(para)

                if randomize_params:
                    para = 2.0*torch.rand_like(para) - 1.0   # rnd [-1, 1]
                
                elif params_4pi_normalization:        
                    para = para % (4.0*np.pi)              # limit to [0, 4pi]
                    para = (para-2.0*np.pi) / (2.0*np.pi)  # [0, 4pi] to [-1, +1] 

                params_tensor[:len(para), t] = para
                         
            return tensor, params_tensor       
        return tensor
        
    def decode(self, 
               tensor: torch.Tensor, 
               params_tensor: Optional[torch.Tensor] = None, 
               params_4pi_normalization: bool = True,
               ignore_errors: bool = False,
               place_error_placeholders: bool = False) -> CircuitInstructions:
        """Convert a given `torch.Tensor` to `CircuitInstructions`."""
     
        assert tensor.dim() == 2, f"{tensor.shape=}"
        num_of_qubits, time = tensor.shape
        
        instructions = CircuitInstructions(tensor_shape=tensor.shape)
        
        for t in range(time):         
            enc_time_slice = tensor[:, t] # contains all bits at time t   

            _gate_placed = False
            
            for gate_index, gate in self.vocabulary_inverse.items():   
            
                target_nodes  = (enc_time_slice == (self.sign_labels["target_nodes"]  * gate_index)).nonzero(as_tuple=True)[0]
                control_nodes = (enc_time_slice == (self.sign_labels["control_nodes"] * gate_index)).nonzero(as_tuple=True)[0]

                _gate_placed = False
                
                if target_nodes.nelement() > 0:                                   
                    params = []
                    if exists(params_tensor):
                        params = params_tensor[:, t]
                        if params_4pi_normalization:
                            params = (params+1.0) * 2.0*np.pi    # [-1, 1] to [0, 4pi]
                        params = params.tolist()

                    instructions.add_instruction(gate, control_nodes.tolist(), target_nodes.tolist(), params)
                    _gate_placed = True
                    
                    break  #break on first hit, per def only one gate allowed per t
              
                elif control_nodes.nelement() > 0: # no target but control means error
                    if not ignore_errors:                     
                        raise RuntimeError("target_nodes.nelement() <= 0 but control_nodes.nelement() > 0")

            if not _gate_placed and place_error_placeholders:
                # note we place a h gate with no qubits, so this is always an error
                instructions.add_instruction("h", [], [], [])
        
            #else # we are fine with tensors that have time steps with no action!
        
        return instructions

    @staticmethod
    def get_parametrized_tokens(vocabulary: Vocabulary) -> List[int]:
        parametrized_names     = "rx ry rz phase cp crx cry crz u u2 u3".split()
        non_parametrized_names = "x y z h cx cy cz ch ccx swap s sdg t tdg".split()
        
        parametrized_tokens = []
        for name, token in vocabulary.items():

            if name in parametrized_names:
                parametrized_tokens.append(token)
            elif name not in non_parametrized_names:
                raise NotImplementedError(f"Unknown gate {name}! Please add it to the known list.")

        return parametrized_tokens

## Test

In [None]:
tensor = torch.tensor([
                [1, 0,-2],
                [0, 1, 2],
                [0, 0,-2],
            ], dtype=torch.int32)

params_tensor = torch.tensor([       # ... [max_params, time]
                    [-0.9,  0.9, 0],
                    [ 0.1, -0.7, 0]
                ])

tokenizer    = CircuitTokenizer({"u2":1, "ccx":2})
instructions = tokenizer.decode(tensor, params_tensor)

instructions.print()
print(instructions.instruction_names_set)

CircuitInstruction(name='u2', control_nodes=[], target_nodes=[0], params=[0.628318727016449, 6.91150426864624])
CircuitInstruction(name='u2', control_nodes=[], target_nodes=[1], params=[11.9380521774292, 1.8849557638168335])
CircuitInstruction(name='ccx', control_nodes=[0, 2], target_nodes=[1], params=[6.2831854820251465, 6.2831854820251465])
{'u2', 'ccx'}


In [None]:
enc_tensor, enc_params_tensor = tokenizer.encode(instructions)
enc_tensor, enc_params_tensor

(tensor([[ 1,  0, -2],
         [ 0,  1,  2],
         [ 0,  0, -2]], dtype=torch.int32),
 tensor([[-0.9000,  0.9000,  0.0000],
         [ 0.1000, -0.7000,  0.0000]]))

In [None]:
assert torch.allclose(tensor, enc_tensor)
assert torch.allclose(params_tensor, enc_params_tensor)

In [None]:
tokenizer = CircuitTokenizer({"u2":1, "ccx":2})
assert tokenizer.vocabulary == {'u2': 1, 'ccx': 2}

In [None]:
# test background token checking
tokenizer = CircuitTokenizer({"u2":0, "ccx":1, "h":2, "ry":3})
assert tokenizer.vocabulary == {"u2":1, "ccx":2, "h":3, "ry":4}



In [None]:
print(CircuitTokenizer.get_parametrized_tokens(tokenizer.vocabulary))
assert CircuitTokenizer.get_parametrized_tokens(tokenizer.vocabulary) == [1, 4]

[1, 4]


# Export -

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()