In [25]:
# Environment setup
import discopy

# Patch missing .is_mixed (for lambeq + newer discopy)
if not hasattr(getattr(discopy, "monoidal", None).Diagram, "is_mixed"):
    discopy.monoidal.Diagram.is_mixed = property(lambda self: False)

print("✅ Environment ready (patched is_mixed if needed)")

from lambeq import AtomicType

N = AtomicType.NOUN
S = AtomicType.SENTENCE
P = AtomicType.PREPOSITIONAL_PHRASE
CONJ = AtomicType.CONJUNCTION
PUNCT = AtomicType.PUNCTUATION


✅ Environment ready (patched is_mixed if needed)


In [26]:
from lambeq import AtomicType
from lambeq import IQPAnsatz

# Example: noun → 1 qubit, sentence → 1 qubit
ob_map = dict({
    N: 2,
    S: 1,
    P: 0,
    CONJ: 1,
    PUNCT: 0
})

ansatz = IQPAnsatz(ob_map, n_layers=2)


In [27]:
#load data
import pandas as pd
def load_data(csv_file, sample_fraction=1.0):
    """Loads Question Pairs from a CSV file

    Args:
        csv_file (str): Path to csv_file
        sample_fraction (float): Fraction of data to sample, default is 1.0
    Returns:
        tuple: A tuple containing supervised data pairs
        returns [],[] on error
    """
    sentences1 = []
    sentences2 = []
    is_duplicate = []
    
    try:
        df = pd.read_csv(csv_file, encoding='utf-8')
        #print("Column names:", df.columns)
        
        if sample_fraction < 1.0:
            df = df.sample(frac=sample_fraction, random_state=42).reset_index(drop=True)
        
        sentence1_series = df['question1']
        sentence2_series = df['question2']
        is_duplicate_series = df['is_duplicate']
        
        sentences1 = sentence1_series.tolist()
        sentences2 = sentence2_series.tolist()
        is_duplicate = is_duplicate_series.tolist()
        
        if len(sentences1) != len(sentences2):
            raise ValueError("The number of sentences in question1 and question2 do not match.")
        else:
            print(f"Loaded {len(sentences1)} sentences.")
        return sentences1, sentences2, is_duplicate
        
    except FileNotFoundError:
        print("Wrong Path")
        return [],[],[]
    
    except Exception as e:
        print(f"An {e} Error Occurred")
        return [],[],[]

DATA_PATH = r'C:/Users/Jash\Documents/Research\Semantic Equivilance\SemanticEquivilance/question_pairs/questions.csv'
sentences1, sentences2, value = load_data(DATA_PATH, sample_fraction=0.001)
data_pairs = list(zip(sentences1, sentences2, value))


Loaded 404 sentences.


In [None]:
import pennylane as qml
import numpy as np
from lambeq import AtomicType, BobcatParser, Rewriter, IQPAnsatz, SpacyTokeniser


def swap_test(state1_vec, state2_vec, num_qubits, initial_state=0):
    """
    Performs a Quantum Swap Test between two quantum state vectors.

    Args:
        state1_vec (np.ndarray): The first state vector.
        state2_vec (np.ndarray): The second state vector.

    Returns:
        float: The estimated squared overlap (fidelity) between the two states.
    """
    SHOT_COUNT = 1000000 #expected less than 1% error only done on swap test, expectation values calculated with different amount of shots
    if 2**num_qubits != len(state1_vec):
        raise ValueError("State vectors must have a length that is a power of 2.")

    total_qubits = 1 + 2 * num_qubits #1 Ancilla qubit + 2 state qubits

    dev = qml.device("lightning.qubit", wires=total_qubits, shots=SHOT_COUNT)

    @qml.qnode(dev)
    def circuit(): #|0 , psi, phi>
        # Step 1: Prepare the ancilla qubit in a superposition
        qml.Hadamard(wires=0)
        print(f"Comparing: {state1_vec} and {state2_vec}")
        # Step 2: Prepare the two input states
        #basis for protocol 1
        qml.StatePrep(state1_vec, wires=range(1, 1 + num_qubits), normalize=True)
        #basis for protocol 1
        qml.StatePrep(state2_vec, wires=range(1 + num_qubits, 1 + 2 * num_qubits))

        # Step 3: Apply controlled-SWAP gates
        for i in range(num_qubits):
            qml.CSWAP(wires=[0, 1 + i, 1 + num_qubits + i]) #selects every register of phi and psi for swap

        # Step 4: Apply Hadamard to ancilla
        qml.Hadamard(wires=0)
        # Step 5: Measure the ancilla qubit
        return qml.sample(wires=0)

    measurement_results = circuit()
    squared_overlap = 1 - 2/len(measurement_results) * np.sum(measurement_results)
    
    return abs(squared_overlap)

In [33]:
from lambeq.backend.pennylane import to_pennylane as to_qml
def lambeq_sentence_to_circuit(sentence, Tokeniser, ansatz, parser, rewriter, return_type='state', include_debug_prints=False, expval = qml.PauliZ(0), target_qubits=None):
    """
    Converts a natural language sentence into a quantum state vector
    using Lambeq's BobcatParser and IQPAnsatz, handling parameterization
    via PennyLaneModel.

    Args:
        sentence (str): The input sentence.
        ansatz (lambeq.ansatz.Ansatz): The quantum ansatz to apply.
        parser (lambeq.parser.Parser): The parser to convert sentence to diagram. This is a GPT
        rewriter (lambeq.rewrite.Rewriter): The rewriter to simplify the diagram. This is a GPT
        include_debug_prints (bool): Whether to include detailed debug prints.
        start_basis_state (int): The basis state to initialize the qubits. Default is |0>.
        seed (int, optional): Seed for random number generation for reproducibility.
        return_type (str): 'state' or 'expval' to specify the return type.
        expval (qml.Observable): The observable for expval value calculation.

    Returns:
        tuple: A tuple containing the state vector (np.ndarray) and
               the number of qubits (int).
    """
    try:
        if include_debug_prints:
            print(f"\n--- Debugging: Sentence '{sentence}' ---")
        # Step 0: Tokenize the sentence
        tokens = Tokeniser.tokenise_sentence(sentence) # Not optional for non-clean inputs
        
        # Step 1: Convert sentence to a DisCoPy diagram
        diagram = parser.sentence2diagram(tokens, tokenised=True)
        if include_debug_prints:
            print("Step 1: Sentence parsed to diagram.")

        # Step 2: Rewrite the diagram
        rewritten_diagram = rewriter(diagram)
        if include_debug_prints:
            print("Step 2: Diagram rewritten.")

        # Step 3: Normalize the diagram
        normalized_diagram = rewritten_diagram.normal_form()
        if include_debug_prints:
            print("Step 3: Diagram normalized.")
            
        # Step 4: Apply the ansatz to the normalized diagram to get a DisCoPy circuit
        circuit = ansatz(normalized_diagram)
        if include_debug_prints:
            print("Step 4: Ansatz applied to create DisCoPy circuit.")

        # Step 5: Convert the DisCoPy circuit to a PennyLane circuit object
        temp_qml_circuit = to_qml(circuit)
        
        num_qubits = temp_qml_circuit._n_qubits
        param_structure = temp_qml_circuit._params
        device_qubits = target_qubits if target_qubits is not None and target_qubits >= num_qubits else num_qubits
        
        if include_debug_prints:
            print("Step 5: DisCoPy circuit converted to PennyLane object.")
            print(f"Parameter structure: {param_structure}")
            print(f"Number of qubits: {num_qubits}, Device Wires: {device_qubits}")
        
        # Build parameters in the exact same structure as _params
        structured_params = []
            
        if include_debug_prints:
            print("Step 6: Structured parameters generated.")
            
        dev = qml.device("lightning.qubit", wires=device_qubits)
        
        @qml.qnode(dev)
        def qnode_circuit(structured_params):
            circuit_func = temp_qml_circuit.make_circuit()
            circuit_func(structured_params)
            if return_type == 'expval':
                return qml.expval(expval)
            else:
                return qml.state()  # Return the state vector after applying the circuit
        
        return qnode_circuit, param_structure, device_qubits
    except Exception as e:
        print(f"CRITICAL ERROR: Failed to process circuit: {e}")
        return None, None, None

In [12]:
import random
def initialize_structured_params(param_structure, sentence_seed=None):
    """
    Initializes structured parameters based on the given parameter structure.

    Args:
        param_structure (list): The structure of parameters as a list of lists.
        seed (int): Seed for random number generation for reproducibility.

    Returns:
        list: A list of structured parameters with random values.
    """
    if sentence_seed is not None:
        random.seed(sentence_seed)
    structured_params = []
    for param_group in param_structure:
        if isinstance(param_group, list) and len(param_group) > 0:
            # This parameter group has parameters - create random values for each
            group_values = [random.uniform(0.1, 2 * np.pi - 0.1) for _ in param_group]
            structured_params.append(group_values)
        else:
            # Empty parameter group
            structured_params.append([])
    
    return structured_params

In [13]:
import math
def FischerInformation(Fidelity): #Statisitcal Information and DISTANCE metric Fubini-Study Metric/ Wooters Distance
    rootFidelity = math.sqrt(Fidelity) #Square root fidelity term
    return math.acos(rootFidelity)

In [14]:
def loss_function(
    overlap, 
    target_fidelity,  # <--- THIS IS YOUR GROUND TRUTH (is_duplicate: 0 or 1)
    structural_disparity, 
    lambda_penalty
):
    """
    Combines Fidelity MSE loss (driven by ground truth) with Structural Disparity Penalty.
    """
    
    # 1. Fidelity MSE Loss (Ground Truth Term)
    # The (target_fidelity - overlap)**2 term is where the ground truth is applied.
    # It drives the measured overlap toward the ground-truth label.
    fidelity_loss = (target_fidelity - overlap)**2
    
    # 2. Structural Penalty Term
    structural_penalty = lambda_penalty * structural_disparity
    
    # 3. Total Loss
    return fidelity_loss + structural_penalty

In [15]:
def calculate_overlap(qnode_func, params1, params2):
    """
    Calculates the overlap between two quantum states prepared by the same QNode
    but with different parameters. This is a shortcut that only works with quantum simulators, for real quantum hardware, use the swap test function.

    Args:
        qnode_func (qml.QNode): The quantum circuit QNode.
        params1 (list): Structured parameters for the first state.
        params2 (list): Structured parameters for the second state.

    Returns:
        float: The estimated squared overlap (fidelity) between the two states.
    """
    # Get the state vectors for both sets of parameters
    state1 = qnode_func(params1)
    state2 = qnode_func(params2)
    
    # Calculate the overlap
    overlap = np.abs(np.vdot(state1, state2))**2
    
    return overlap

In [16]:
def calculate_pair_gradients(qnode_func, params, s1, s2, target_fidelity, structural_disparity):
    """
    Calculates gradients for a pair of sentences with respect to the shared parameters.
    Returns a dictionary of gradients indexed by (group, element) tuple.
    """
    gradients = {idx: 0.0 for idx in [(g, e) for g in range(len(params)) for e in range(len(params[g]))]}

    # We need to compute the gradient for each parameter
    for group_idx, group in enumerate(params):
        for elem_idx in range(len(group)):
            param_index = (group_idx, elem_idx)

            # Shift the parameters for both sentences in the pair
            params_plus_s1 = params.copy()
            params_plus_s1[group_idx][elem_idx] += np.pi / 2
            
            params_minus_s1 = params.copy()
            params_minus_s1[group_idx][elem_idx] -= np.pi / 2

            # Overlap for shifted s1
            overlap_plus = calculate_overlap(qnode_func, params_plus_s1, params)
            overlap_minus = calculate_overlap(qnode_func, params_minus_s1, params)

            # Gradient contribution from S1
            grad_s1 = (loss_function(overlap_plus, target_fidelity, structural_disparity, 0) - loss_function(overlap_minus, target_fidelity, structural_disparity, 0)) / 2

            # Now, shift the parameters for the second sentence (S2)
            params_plus_s2 = params.copy()
            params_plus_s2[group_idx][elem_idx] += np.pi / 2
            
            params_minus_s2 = params.copy()
            params_minus_s2[group_idx][elem_idx] -= np.pi / 2

            # Overlap for shifted S2
            overlap_plus_2 = calculate_overlap(qnode_func, params, params_plus_s2)
            overlap_minus_2 = calculate_overlap(qnode_func, params, params_minus_s2)

            # Gradient contribution from S2
            grad_s2 = (loss_function(overlap_plus_2, target_fidelity, structural_disparity, 0) - loss_function(overlap_minus_2, target_fidelity, structural_disparity, 0)) / 2
            
            # The total gradient is the sum of contributions from both states
            gradients[param_index] = grad_s1 + grad_s2

    return gradients

In [None]:
def get_max_qubits_for_dataset(data_pairs, Tokeniser, ansatz, parser, rewriter):
    """Calculates the maximum number of qubits required by any sentence in the dataset."""
    print("Determining maximum qubit count for the entire dataset...")
    max_qubits = 0
    # Use the original (unmodified) lambeq_sentence_to_circuit to get the *required* qubits
    for s1, s2, _ in data_pairs:
        # Note: We call a temporary version of the circuit builder that is not padded.
        # It's an internal detail, but for simplicity, we assume a helper can get the required wires.
        # Here, we will just use the unpadded version's output for the required number of qubits.
        _, _, required_qubits_s1 = lambeq_sentence_to_circuit(
            s1, Tokeniser, ansatz, parser, rewriter, return_type='state'
        )
        _, _, required_qubits_s2 = lambeq_sentence_to_circuit(
            s2, Tokeniser, ansatz, parser, rewriter, return_type='state'
        )
        
        if required_qubits_s1 is not None and required_qubits_s1 > max_qubits:
            max_qubits = required_qubits_s1
        if required_qubits_s2 is not None and required_qubits_s2 > max_qubits:
            max_qubits = required_qubits_s2
            
    print(f"Maximum required qubits (N_max) across the dataset is: {max_qubits}")
    return max_qubits
history = [] #for plots
#Hyperparameters
BASE_LEARNING_RATE = 0.01
LAMDA_PENALTY = 0.1  # Weight for the structural disparity penalty

def main_workflow(data_pairs, parser, rewriter, ansatz, Tokeniser, base_learning_rate=BASE_LEARNING_RATE, lambda_penalty=LAMDA_PENALTY):
    
    # 1. Preprocessing: Determine the global maximum qubit count
    N_MAX = get_max_qubits_for_dataset(data_pairs, Tokeniser, ansatz, parser, rewriter)
    if N_MAX == 0:
        print("No valid sentences found to train on. Exiting.")
        return {}
        
        
    #Store relevent metadata (target-fidelity, structural disparity) for each pair
    processed_pairs = []
    for s1, s2, is_duplicate in data_pairs:
        _, _, N1 = lambeq_sentence_to_circuit(s1,Tokeniser,ansatz,parser,rewriter,return_type='state')
        _, _, N2 = lambeq_sentence_to_circuit(s2,Tokeniser,ansatz,parser,rewriter,return_type='state')
        
        #calculate structural disparity (absolute qubit difference)
        structural_disparity = abs(N1 - N2) if N1 is not None and N2 is not None else N_MAX
        processed_pairs.append((s1, s2, is_duplicate, structural_disparity))
        
    print(f"\n--- Starting Training on Dimension N_MAX = {N_MAX} ---")
    
    # 2. Model Initialization
    rep_sentence = data_pairs[0][0] # Use first sentence to get parameter structure
    
    # The QNode will now be set to size N_MAX for the entire training
    qnode_func, param_structure, device_qubits = lambeq_sentence_to_circuit(
        rep_sentence, Tokeniser, ansatz, parser, rewriter, 
        return_type='state', target_qubits=N_MAX # <--- Use N_MAX for device
    )
    
    # Check if circuit generation succeeded
    if qnode_func is None or device_qubits != N_MAX:
        print("Failed to generate representative circuit. Exiting.")
        return {}
        
    # 3. Initialize a single set of parameters for the N_MAX dimension
    params = initialize_structured_params(param_structure)
    
    # 4. Training Loop with Dynamic Feedback
    for s1, s2, target_fidelity, structural_disparity in processed_pairs:
        #overlap
        overlap = calculate_overlap(qnode_func, params, params)
        qangle = FischerInformation(overlap)
        normalized_qangle = qangle / (math.pi / 2)
        if target_fidelity == 1:
            eta_dynamic = base_learning_rate * normalized_qangle
        else:
            eta_dynamic = base_learning_rate * (1 - normalized_qangle)
        # Calculate gradients using the shared QNode (qnode_func is already set to N_MAX)
        gradients = calculate_pair_gradients(qnode_func, params, s1, s2, target_fidelity, structural_disparity)
        
        # Apply the update
        for group_idx, group in enumerate(params):
            for elem_idx in range(len(group)):
                param_index = (group_idx, elem_idx)
                params[group_idx][elem_idx] -= eta_dynamic * gradients[param_index]
        
        # Logging for monitoring
        total_loss = loss_function(overlap, target_fidelity, structural_disparity, lambda_penalty)
        history.append({
            'step': len(history) + 1, # Use +1 since len(history) is the index of the next item
            's1': s1,
            's2': s2,
            'target_fidelity': target_fidelity,
            'structural_disparity': structural_disparity,
            'fidelity': overlap,
            'q_angle': qangle,
            'eta_dyn': eta_dynamic,
            'total_loss': total_loss
        })
    trained_params = {N_MAX: params} # Store the single trained model
    
    print("\n--- Training Complete ---")
    
    # 5. Final Evaluation with Swap Test
    print("\n--- Performing Final Swap Tests with Trained Parameters ---")
    # Sample a few pairs for evaluation
    sample_pairs = data_pairs[:5] if len(processed_pairs) > 5 else processed_pairs
    
    for s1, s2, target_fidelity, structural_disparity in sample_pairs:
        print(f"\nEvaluating Pair: '{s1}' vs. '{s2}' (Label: {target_fidelity}, Disparity: {structural_disparity}")
        
        optimized_params = trained_params[N_MAX]
        
        # Generate State 1 and State 2 vectors, both will be padded to N_MAX
        qnode1, _, _ = lambeq_sentence_to_circuit(s1, Tokeniser, ansatz, parser, rewriter, return_type='state', target_qubits=N_MAX)
        qnode2, _, _ = lambeq_sentence_to_circuit(s2, Tokeniser, ansatz, parser, rewriter, return_type='state', target_qubits=N_MAX)
        
        vec1 = qnode1(optimized_params)
        vec2 = qnode2(optimized_params)

        # Call swap_test, passing N_MAX as the number of qubits for *one* register
        overlap = swap_test(vec1, vec2, N_MAX) 
        Qangle = FischerInformation(overlap)
        normalized_qangle = Qangle / (math.pi / 2)
        
        #Calculate final loss
        final_loss = loss_function(overlap, target_fidelity, structural_disparity, lambda_penalty)
        #Determine dynamic learning rate based on Qangle
        print(f"Final Overlap (Fidelity): {overlap:.4f}")
        print(f"Estimated Quantum Angle (Normalized): {normalized_qangle:.4f}")
        print(f"Total Penalized Loss: {final_loss:.4f}")
    return trained_params, history

In [None]:
#plots
import matplotlib.pyplot as plt

def plot_training_history(history):
    """Generates two key plots showing VQC training convergence and feedback."""
    
    if not history:
        print("History list is empty. Cannot plot.")
        return

    # Extracting data from history list
    steps = np.array([d['step'] for d in history])
    losses = np.array([d['total_loss'] for d in history])
    q_angles = np.array([d['q_angle'] for d in history])
    eta_dyn = np.array([d['eta_dyn'] for d in history])
    
    # -------------------------------------------------------------
    # PLOT 1: Loss Convergence and Dynamic Learning Rate
    # -------------------------------------------------------------
    fig, ax1 = plt.subplots(figsize=(12, 6))

    # Plot Loss (Left Y-Axis)
    color = 'tab:blue'
    ax1.set_xlabel('Training Step (Pair)')
    ax1.set_ylabel('Total Penalized Loss', color=color)
    ax1.plot(steps, losses, color=color, label='Total Loss')
    ax1.tick_params(axis='y', labelcolor=color)
    ax1.set_yscale('log') # Use log scale for loss for better visualization

    # Plot Dynamic Learning Rate (Right Y-Axis)
    ax2 = ax1.twinx()  
    color = 'tab:red'
    ax2.set_ylabel(f'Dynamic Learning Rate (Max={BASE_LEARNING_RATE:.2f})', color=color)  
    ax2.plot(steps, eta_dyn, color=color, linestyle='--', alpha=0.6, label='$\eta_{dyn}$')
    ax2.tick_params(axis='y', labelcolor=color)
    
    plt.title('VQC Training Convergence and Dynamic Feedback')
    fig.tight_layout() 
    plt.grid(True, which="both", ls="--", linewidth=0.5)
    plt.savefig('vqc_convergence_and_eta.png')
    plt.close(fig)
    print("Saved plot: vqc_convergence_and_eta.png")
    
    # -------------------------------------------------------------
    # PLOT 2: Quantum Angle Evolution (Geometric Feedback)
    # -------------------------------------------------------------
    fig, ax = plt.subplots(figsize=(12, 6))
    
    ax.set_xlabel('Training Step (Pair)')
    ax.set_ylabel('Quantum Angle $\Theta$ (Fubini-Study Distance)')
    
    # The Quantum Angle is in [0, pi/2] radians
    ax.plot(steps, q_angles, color='tab:green', label='Quantum Angle')
    
    # Add target lines (This is a simplified view)
    ax.axhline(0, color='red', linestyle=':', linewidth=1.5, label='Target Angle (Duplicate)')
    ax.axhline(np.pi/2, color='orange', linestyle=':', linewidth=1.5, label='Target Angle (Non-Duplicate)')
    
    plt.title('Evolution of Quantum Angle (Geometric Distance)')
    plt.legend()
    fig.tight_layout()
    plt.grid(True, which="both", ls="--", linewidth=0.5)
    plt.savefig('quantum_angle_evolution.png')
    plt.close(fig)
    print("Saved plot: quantum_angle_evolution.png")

In [None]:
#OLD FUNC DONT USE (FOR REFERENCE ONLY)
import spacy
if __name__ == "__main__":
    spacy.load('en_core_web_sm')
    Tokeniser = SpacyTokeniser()
    parser = BobcatParser()
    rewriter = Rewriter(['curry', 'prepositional_phrase', 'determiner'])
    # Increase ansatz parameters to get more complex quantum states
    # ansatz = StronglyEntanglingAnsatz(
    # {AtomicType.NOUN: 2, AtomicType.SENTENCE: 1}, 
    # n_layers=1
    ansatz = IQPAnsatz({AtomicType.NOUN: 2, AtomicType.SENTENCE: 1, AtomicType.CONJUNCTION: 1}, n_layers=1
)

    print("--- Generating states from sentences ---")

    sentence1 = "Alice loves the dog that Bob purchased."
    sentence2 = "Bob loves the dog that Alice sold."
    sentence3 = "The big cat sleeps peacefully."
    sentence4 = "The small bird sings loudly."
    sentence5 = "The lizard basks in the sun."
    sentence6 = "The sun shines on the lizard"
    
    sentences = [sentence1, sentence2, sentence3, sentence4, sentence5, sentence6]
    state_data = {}
    for s_idx, sentence in enumerate(sentences):
        try:
            state_vec, num_qubits = lambeq_sentence_to_circuit(sentence, Tokeniser, ansatz, parser, rewriter)
            #state_vec, num_qubits = lambeq_sentence_to_state_vector(sentence, ansatz, parser, rewriter, include_debug_prints=False)
            state_data[sentence] = (state_vec, num_qubits)
            print(f"Sentence {s_idx+1}: '{sentence}'")
            print(f"Generated state with {num_qubits} qubits")
        except Exception as e:
            print(f"Error processing sentence '{sentence}': {e}")
            state_data[sentence] = (None, None)

    print("\n--- Performing Swap Tests ---")
    
    # Filter out sentences that did not produce valid states
    valid_sentences = [s for s in sentences if state_data[s][0] is not None and state_data[s][1] is not None and state_data[s][1] > 0]

    if not valid_sentences:
        print("No valid multi-qubit states generated. Cannot perform Swap Tests meaningfully.")
    else:
        first_num_qubits = state_data[valid_sentences[0]][1]
        all_same_qubits = all(state_data[s][1] == first_num_qubits for s in valid_sentences)

        if not all_same_qubits:
            print("\nWarning: Not all valid sentences resulted in circuits with the same number of qubits.")
            print("Swap Test requires states to have the same number of qubits.")
            print("Pairs with different qubit counts will be skipped.")
            for s in valid_sentences:
                print(f"  '{s}': {state_data[s][1]} qubits")

        for i in range(len(valid_sentences)):
            for j in range(i, len(valid_sentences)):
                s1 = valid_sentences[i]
                s2 = valid_sentences[j]

                vec1, nq1 = state_data[s1]
                vec2, nq2 = state_data[s2]

                if nq1 == nq2:
                    print(f"\nSwap Test between '{s1}' and '{s2}':")
                    # Fix: Use nq1 (or nq2, they're equal)
                    overlap = swap_test(vec1, vec2, nq1)
                    print(overlap)
                    Qangle = FischerInformation(overlap) # 0 - pi/2
                    
                    print(f"Estimated Quantum Angle: {Qangle:.4f}")
                    if s1 == s2:
                        print(" (Expected to be close to 1.0 for identical states)")
                else:
                    print(f"\nSkipping Swap Test between '{s1}' ({nq1} qubits) and '{s2}' ({nq2} qubits) due to different qubit counts.")

--- Generating states from sentences ---
Error processing sentence 'Alice loves the dog that Bob purchased.': too many values to unpack (expected 2)
Error processing sentence 'Bob loves the dog that Alice sold.': too many values to unpack (expected 2)
Error processing sentence 'The big cat sleeps peacefully.': too many values to unpack (expected 2)
Error processing sentence 'The small bird sings loudly.': too many values to unpack (expected 2)
Error processing sentence 'The lizard basks in the sun.': too many values to unpack (expected 2)
Error processing sentence 'The sun shines on the lizard': too many values to unpack (expected 2)

--- Performing Swap Tests ---
No valid multi-qubit states generated. Cannot perform Swap Tests meaningfully.


In [None]:
if __name__ == "__main__":
    import spacy
    spacy.load('en_core_web_sm')
    Tokeniser = SpacyTokeniser()
    parser = BobcatParser()
    rewriter = Rewriter(['curry', 'prepositional_phrase', 'determiner'])
    # ansatz = StronglyEntanglingAnsatz(
    # {AtomicType.NOUN: 2, AtomicType.SENTENCE: 1}, 
    # n_layers=1 
    trained_params, history = main_workflow(data_pairs, parser, rewriter, ansatz, Tokeniser)
    plot_training_history(history)

Determining maximum qubit count for the entire dataset...
CRITICAL ERROR: Failed to process circuit: Bobcat failed to parse 'A Rs . 5000 item can be insured for its total value by paying a premium of Rs . N. If the probability of theft in a given year is estimated to be .01 , what premium should the insurance company charge if it wants the expected gain to be equal to Rs . 1000 ?'.
CRITICAL ERROR: Failed to process circuit: Bobcat failed to parse 'Is there a St. Marie Adolphine Dierks ? How did she become a saint and what was her contribution to the faith ?'.
CRITICAL ERROR: Failed to process circuit: Bobcat failed to parse 'Is there a St. Marie Trezelle ? How did she become a saint and what was her contribution to the faith ?'.
CRITICAL ERROR: Failed to process circuit: Bobcat failed to parse 'What is it like to be homeless in Algeria ?'.
CRITICAL ERROR: Failed to process circuit: Bobcat failed to parse 'Which is the best smartphone I can buy under Rs.6000 ?'.
CRITICAL ERROR: Failed t