<a href="https://colab.research.google.com/github/SoftLocked/QuGPT/blob/main/QuGPT_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install qiskit

Collecting qiskit
  Downloading qiskit-2.3.0-cp310-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (12 kB)
Collecting rustworkx>=0.15.0 (from qiskit)
  Downloading rustworkx-0.17.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting stevedore>=3.0.0 (from qiskit)
  Downloading stevedore-5.6.0-py3-none-any.whl.metadata (2.3 kB)
Downloading qiskit-2.3.0-cp310-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (8.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rustworkx-0.17.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m40.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading stevedore-5.6.0-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.4/54.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling coll

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import math
import random
from dataclasses import dataclass
from typing import List, Tuple, Dict, Set
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence

# Token vocabulary (25 tokens)

In [5]:
@dataclass
class GateInfo:
  '''Describes a token in our vocabulary'''
  name: str       # The token's name (e.g. "h_q0")
  qasm_name: str  # The token's name in qasm 2.0 (e.g. "h")
  qubits: tuple   # Which qubits it affects (e.g. "(0,)" or "(0, 1)")
  is_t_gate: bool # True for T and Tdg
  description: str

In [7]:
def build_vocabulary() -> Tuple[Dict[int, GateInfo], Dict[str, int], int, set]:
  '''
  Build the Clifford+T vocabulary for 2-qubit circuits

  Returns:
    token_to_gate: dict[int, GateInfo]
    gate_to_token: dict[str, int]
    vocab_size: int
    t_gate_token_ids: set
  '''

  gates: List[GateInfo] = []

  # Special tokens
  special = [
      ("<pad>", "Padding"),
      ("<start>", "Start of sequence"),
      ("<end>", "End of sequence")
  ]

  # 1-qubit Clifford tokens
  clifford_1q = [
        ("h",   "Hadamard — maps X↔Z, creates superposition"),
        ("s",   "S = √Z — phase gate, quarter-turn around Z"),
        ("sdg", "S† — inverse of S"),
        ("x",   "Pauli X — bit flip"),
        ("y",   "Pauli Y — bit + phase flip"),
        ("z",   "Pauli Z — phase flip"),
  ]

  # 2-qubit Clifford tokens
  clifford_2q = [
        ("cx",   "CNOT — flips target if control is |1⟩"),
        ("cz",   "CZ — applies Z to target if control is |1⟩"),
        ("swap", "SWAP — exchanges the two qubits"),
  ]

  # T tokens
  t = [
      ("t", "T = √S"),
       ("tdg", "T† = inverse of T")
  ]



  # Apply special tokens
  for name, description in special:
    gates.append(GateInfo(name, name, (), False, description))

  # Apply 1-qubit Clifford tokens
  for gate_name, desc in clifford_1q:
        for q in [0, 1]:
            gates.append(GateInfo(
                name=f"{gate_name}_q{q}",
                qasm_name=gate_name,
                qubits=(q,),
                is_t_gate=False,
                description=f"{desc} on q{q}",
            ))

  # Apply 2-qubit Clifford tokens
  for gate_name, desc in clifford_2q:
        for q0, q1 in [(0, 1), (1, 0)]:
            gates.append(GateInfo(
                name=f"{gate_name}_q{q0}_q{q1}",
                qasm_name=gate_name,
                qubits=(q0, q1),
                is_t_gate=False,
                description=f"{desc}: q{q0}→q{q1}",
            ))
  # Apply T tokens
  for gate_name, desc in t:
        for q in [0, 1]:
            gates.append(GateInfo(
                name=f"{gate_name}_q{q}",
                qasm_name=gate_name,
                qubits=(q,),
                is_t_gate=True,
                description=f"{desc} on q{q}",
            ))

  # Build lookups
  token_to_gate = {i: g for i, g in enumerate(gates)}
  gate_to_token = {g.name: i for i, g in enumerate(gates)}
  vocab_size = len(gates)

  # Identify which token ID's are T gates (to penalize their use)
  t_gate_token_ids = {i for i, g in enumerate(gates) if g.is_t_gate}

  return token_to_gate, gate_to_token, vocab_size, t_gate_token_ids

# Define constants
TOKEN_TO_GATE, GATE_TO_TOKEN, VOCAB_SIZE, T_GATE_TOKENS = build_vocabulary()
PAD_TOKEN   = GATE_TO_TOKEN['<pad>']
START_TOKEN = GATE_TO_TOKEN['<start>']
END_TOKEN   = GATE_TO_TOKEN['<end>']


In [9]:
# Prints the full vocabulary for inspection
print(f"Vocabulary: {VOCAB_SIZE} tokens")
print(f"T-gate token IDs: {T_GATE_TOKENS}\n")
for i in range(VOCAB_SIZE):
    g = TOKEN_TO_GATE[i]
    t_marker = " ← T GATE (expensive!)" if g.is_t_gate else ""
    print(f"  [{i:2d}] {g.name:18s}  qubits={str(g.qubits):10s}  {g.description}{t_marker}")


Vocabulary: 25 tokens
T-gate token IDs: {24, 21, 22, 23}

  [ 0] <pad>               qubits=()          Padding
  [ 1] <start>             qubits=()          Start of sequence
  [ 2] <end>               qubits=()          End of sequence
  [ 3] h_q0                qubits=(0,)        Hadamard — maps X↔Z, creates superposition on q0
  [ 4] h_q1                qubits=(1,)        Hadamard — maps X↔Z, creates superposition on q1
  [ 5] s_q0                qubits=(0,)        S = √Z — phase gate, quarter-turn around Z on q0
  [ 6] s_q1                qubits=(1,)        S = √Z — phase gate, quarter-turn around Z on q1
  [ 7] sdg_q0              qubits=(0,)        S† — inverse of S on q0
  [ 8] sdg_q1              qubits=(1,)        S† — inverse of S on q1
  [ 9] x_q0                qubits=(0,)        Pauli X — bit flip on q0
  [10] x_q1                qubits=(1,)        Pauli X — bit flip on q1
  [11] y_q0                qubits=(0,)        Pauli Y — bit + phase flip on q0
  [12] y_q1          

# Input Representation

In [12]:
def unitary_to_tensor(U: np.ndarray) -> torch.Tensor:
  '''Convert 4x4 complex unitary to a flat 32-value real tensor'''
  real = torch.tensor(U.real, dtype=torch.float32)
  imag = torch.tensor(U.imag, dtype=torch.float32)
  return torch.stack([real, imag], dim=-1).flatten()

In [13]:
# Test unitary to tensor converter
test_unitary = np.array(
    [ [1+1j, 2+2j, 3+3j, 4+4j],
      [5+5j, 6+6j, 7+7j, 8+8j],
      [9+9j, 10+10j, 11+11j, 12+12j],
      [13+13j, 14+14j, 15+15j, 16+16j]
     ]
)
test_tensor = unitary_to_tensor(test_unitary)
print(test_tensor)

tensor([ 1.,  1.,  2.,  2.,  3.,  3.,  4.,  4.,  5.,  5.,  6.,  6.,  7.,  7.,
         8.,  8.,  9.,  9., 10., 10., 11., 11., 12., 12., 13., 13., 14., 14.,
        15., 15., 16., 16.])
