In [245]:
#load data
import pandas as pd
def load_data(csv_file, sample_fraction=1.0):
    """Loads Question Pairs from a CSV file

    Args:
        csv_file (str): Path to csv_file
        sample_fraction (float): Fraction of data to sample, default is 1.0
    Returns:
        tuple: A tuple containing supervised data pairs
        returns [],[] on error
    """
    sentences1 = []
    sentences2 = []
    is_duplicate = []
    
    try:
        df = pd.read_csv(csv_file, encoding='utf-8')
        #print("Column names:", df.columns)
        
        if sample_fraction < 1.0:
            df = df.sample(frac=sample_fraction, random_state=42).reset_index(drop=True)
        
        sentence1_series = df['question1']
        sentence2_series = df['question2']
        is_duplicate_series = df['is_duplicate']
        
        sentences1 = sentence1_series.tolist()
        sentences2 = sentence2_series.tolist()
        is_duplicate = is_duplicate_series.tolist()
        
        if len(sentences1) != len(sentences2):
            raise ValueError("The number of sentences in question1 and question2 do not match.")
        else:
            print(f"Loaded {len(sentences1)} sentences.")
        return sentences1, sentences2, is_duplicate
        
    except FileNotFoundError:
        print("Wrong Path")
        return [],[],[]
    
    except Exception as e:
        print(f"An {e} Error Occurred")
        return [],[],[]

DATA_PATH = r'C:/Users/Jash\Documents/Research\Semantic Equivilance\SemanticEquivilance/question_pairs/questions.csv'
sentences1, sentences2, value = load_data(DATA_PATH, sample_fraction=0.001)


Loaded 404 sentences.


In [354]:
import pennylane as qml
import numpy as np
from lambeq import AtomicType, BobcatParser, IQPAnsatz, Rewriter
from lambeq.backend.pennylane import to_pennylane as to_qml
import random

def get_circuit_state_vector(qml_qnode_func, num_wires):
    """
    Executes a PennyLane QNode function and returns the state vector.
    The qml_qnode_func should return a qml.QNode object.
    """
    dev = qml.device("default.qubit", wires=num_wires)

    @qml.qnode(dev)
    def state_vector_wrapper():
        qml_qnode_func() # Execute the actual circuit operations
        return qml.state()

    return state_vector_wrapper()

def swap_test(state1_vec, state2_vec, num_qubits):
    """
    Performs a Quantum Swap Test between two quantum state vectors.

    Args:
        state1_vec (np.ndarray): The first state vector.
        state2_vec (np.ndarray): The second state vector.

    Returns:
        float: The estimated squared overlap (fidelity) between the two states.
    """
    if 2**num_qubits != len(state1_vec):
        raise ValueError("State vectors must have a length that is a power of 2.")

    total_qubits = 1 + 2 * num_qubits #1 Ancilla qubit + 2 state qubits

    dev = qml.device("default.qubit", wires=total_qubits, shots=1000) # N = 1000 runs of the circuit for statistical significance

    @qml.qnode(dev)
    def circuit(): #|0 , psi, phi>
        # Step 1: Prepare the ancilla qubit in a superposition
        qml.Hadamard(wires=0)
        print(f"Comparing: {state1_vec} and {state2_vec}")
        # Step 2: Prepare the two input states
        qml.StatePrep(state1_vec, wires=range(1, 1 + num_qubits), normalize=True) #take another look at initial state prep

        qml.StatePrep(state2_vec, wires=range(1 + num_qubits, 1 + 2 * num_qubits))

        # Step 3: Apply controlled-SWAP gates
        for i in range(num_qubits):
            qml.CSWAP(wires=[0, 1 + i, 1 + num_qubits + i]) #selects every register of phi and psi for swap

        # Step 4: Apply Hadamard to ancilla
        qml.Hadamard(wires=0)
        # Step 5: Measure the ancilla qubit
        return qml.sample(wires=0)

    measurement_results = circuit()
    prob_0 = np.sum(measurement_results == 0) / len(measurement_results)
    squared_overlap = 2 * prob_0 - 1

    return squared_overlap

In [351]:
# Add this diagnostic function to inspect what's happening
def diagnose_circuit_issue(sentence, ansatz, parser, rewriter):
    """
    Diagnostic function to understand why circuits aren't working
    """
    print(f"\n=== DIAGNOSING: '{sentence}' ===")
    
    # Step 1-5: Same as before
    diagram = parser.sentence2diagram(sentence)
    rewritten_diagram = rewriter(diagram)
    normalized_diagram = rewritten_diagram.normal_form()
    circuit = ansatz(normalized_diagram)
    temp_qml_circuit = to_qml(circuit)
    
    num_qubits = temp_qml_circuit._n_qubits
    param_structure = temp_qml_circuit._params
    
    print(f"Number of qubits: {num_qubits}")
    print(f"Parameter structure length: {len(param_structure)}")
    
    # Create test parameters
    structured_params = []
    for param_group in param_structure:
        if isinstance(param_group, list) and len(param_group) > 0:
            group_values = [np.pi/4 for _ in param_group]  # Use π/4 for all params
            structured_params.append(group_values)
        else:
            structured_params.append([])
    
    total_params = sum(len(group) for group in structured_params)
    print(f"Total parameters: {total_params}")
    
    # Test 1: What does make_circuit() return?
    circuit_func = temp_qml_circuit.make_circuit()
    print(f"Circuit function type: {type(circuit_func)}")
    
    # Test 2: Try to inspect the circuit
    dev = qml.device("default.qubit", wires=num_qubits)
    
    @qml.qnode(dev)
    def inspect_circuit():
        print("About to call circuit_func...")
        try:
            if total_params > 0:
                result = circuit_func(structured_params)
                print(f"Circuit_func returned: {result}")
            else:
                result = circuit_func([])
                print(f"Circuit_func (no params) returned: {result}")
        except Exception as e:
            print(f"Error calling circuit_func: {e}")
            import traceback
            traceback.print_exc()
        
        return qml.state()
    
    # Test 3: Try a minimal circuit with manual gates
    @qml.qnode(dev)
    def manual_test():
        # Apply a simple rotation to first qubit
        qml.RY(np.pi/3, wires=0)
        return qml.state()
    
    print("\n--- Testing manual circuit ---")
    manual_state = manual_test()
    print(f"Manual state (should be different): {manual_state[:4]}")
    
    print("\n--- Testing lambeq circuit ---")
    lambeq_state = inspect_circuit()
    print(f"Lambeq state: {lambeq_state[:8]}")
    
    return lambeq_state

# Usage example:
diagnose_circuit_issue("Alice loves Bob.", ansatz, parser, rewriter)


=== DIAGNOSING: 'Alice loves Bob.' ===
Number of qubits: 2
Parameter structure length: 22
Total parameters: 18
Circuit function type: <class 'pennylane.workflow.qnode.QNode'>

--- Testing manual circuit ---
Manual state (should be different): [0.8660254+0.j 0.       +0.j 0.5      +0.j 0.       +0.j]

--- Testing lambeq circuit ---
About to call circuit_func...
Circuit_func returned: tensor([-0.1872+0.5766j, -0.3785-0.4152j,  0.1917-0.0902j, -0.2224-0.4717j],
       dtype=torch.complex128)
Lambeq state: [1.+0.j 0.+0.j 0.+0.j 0.+0.j]


array([1.+0.j, 0.+0.j, 0.+0.j, 0.+0.j])

In [352]:
def lambeq_sentence_to_state_vector(sentence, ansatz, parser, rewriter, include_debug_prints=False):
    """
    Converts a natural language sentence into a quantum state vector
    using Lambeq's BobcatParser and IQPAnsatz, handling parameterization
    via PennyLaneModel.

    Args:
        sentence (str): The input sentence.
        ansatz (lambeq.ansatz.Ansatz): The quantum ansatz to apply.
        parser (lambeq.parser.Parser): The parser to convert sentence to diagram.

    Returns:
        tuple: A tuple containing the state vector (np.ndarray) and
               the number of qubits (int).
    """
    try:
        if include_debug_prints:
            print(f"\n--- Debugging: Sentence '{sentence}' ---")

        # Step 1: Convert sentence to a DisCoPy diagram
        diagram = parser.sentence2diagram(sentence)
        if include_debug_prints:
            print("Step 1: Sentence parsed to diagram.")

        # Step 2: Rewrite the diagram
        rewritten_diagram = rewriter(diagram)
        if include_debug_prints:
            print("Step 2: Diagram rewritten.")

        # Step 3: Normalize the diagram
        normalized_diagram = rewritten_diagram.normal_form()
        if include_debug_prints:
            print("Step 3: Diagram normalized.")
            
        # Step 4: Apply the ansatz to the normalized diagram to get a DisCoPy circuit
        circuit = ansatz(normalized_diagram)
        if include_debug_prints:
            print("Step 4: Ansatz applied to create DisCoPy circuit.")

        # Step 5: Convert the DisCoPy circuit to a PennyLane circuit object
        temp_qml_circuit = to_qml(circuit)
        if include_debug_prints:
            print("Step 5: DisCoPy circuit converted to PennyLane object.")

        num_qubits = temp_qml_circuit._n_qubits
        param_structure = temp_qml_circuit._params
        
        if include_debug_prints:
            print(f"Parameter structure: {param_structure}")
            print(f"Number of qubits: {num_qubits}, Parameter groups: {len(param_structure)}")

        # Step 6: Create parameters matching the exact structure expected by Lambeq
        sentence_hash = abs(hash(sentence))
        sentence_seed = (sentence_hash * 17 + len(sentence) * 23) % 10000
        random.seed(sentence_seed)
        
        # Build parameters in the exact same structure as _params
        structured_params = []
        all_param_values = []
        
        for param_group in param_structure:
            if isinstance(param_group, list) and len(param_group) > 0:
                # This parameter group has parameters - create random values for each
                group_values = [random.uniform(0.1, 2 * np.pi - 0.1) for _ in param_group]
                structured_params.append(group_values)
                all_param_values.extend(group_values)
            else:
                # Empty parameter group
                structured_params.append([])
            
        if include_debug_prints:
            print("Step 6: Structured parameters generated.")
            print(f"Sentence hash: {sentence_hash}")
            print(f"Sentence seed: {sentence_seed}")
            print(f"Structured params lengths: {[len(group) for group in structured_params]}")
            print(f"Total parameters: {len(all_param_values)}")
            print(f"Sample parameter values: {[round(p, 3) for p in all_param_values[:10]] if all_param_values else 'None'}")
            print(f"Parameter range: {round(min(all_param_values), 3) if all_param_values else 'N/A'} to {round(max(all_param_values), 3) if all_param_values else 'N/A'}")
            
        # Step 7: Execute the circuit - FIX: Don't wrap in another QNode!
        circuit_func = temp_qml_circuit.make_circuit()
        
        if include_debug_prints:
            print(f"About to execute circuit with {len(all_param_values)} total parameters")
        
        # FIXED: Direct execution without wrapping in another QNode
        if any(len(group) > 0 for group in structured_params):
            state_vector = circuit_func(structured_params)
        else:
            state_vector = circuit_func([])
            
        # Convert to numpy if it's a tensor
        if hasattr(state_vector, 'numpy'):
            state_vector = state_vector.numpy()
        elif hasattr(state_vector, 'detach'):
            state_vector = state_vector.detach().numpy()
        
        if include_debug_prints:
            print("Step 7: Circuit executed successfully.")
            print(f"State vector shape: {state_vector.shape}")
            print(f"State vector (first few elements): {state_vector[:min(8, len(state_vector))]}")
            
        return state_vector, num_qubits

    except Exception as e:
        print(f"CRITICAL ERROR: Failed to process circuit: {e}")
        import traceback
        traceback.print_exc()
        print("Returning a default normalized |0> state to allow program continuation.")
        default_state = np.zeros(2, dtype=np.complex128)
        default_state[0] = 1.0
        return default_state, 1

In [None]:
if __name__ == "__main__":
    parser = BobcatParser()
    rewriter = Rewriter(['curry', 'prepositional_phrase', 'determiner'])
    # Increase the complexity to get more interesting quantum states
    #ansatz = IQPAnsatz({AtomicType.NOUN: 1, AtomicType.SENTENCE: 1}, n_layers=2, n_single_qubit_params=3)
    from lambeq import StronglyEntanglingAnsatz

# Test with a different ansatz
    ansatz = StronglyEntanglingAnsatz(
    {AtomicType.NOUN: 2, AtomicType.SENTENCE: 1}, 
    n_layers=2
)

    print("--- Generating states from sentences ---")

    sentence1 = "Alice loves the dog that Bob purchased."
    sentence2 = "Bob loves the dog that Alice sold."
    sentence3 = "The big cat sleeps peacefully."
    sentence4 = "The small bird sings loudly."

    sentences = [sentence1, sentence2, sentence3, sentence4]
    state_data = {}

    for s_idx, sentence in enumerate(sentences):
        try:
            #state_vec, num_qubits = lambeq_sentence_to_state_vector(sentence, ansatz, parser, rewriter, include_debug_prints=True)
            state_vec, num_qubits = lambeq_sentence_to_state_vector(sentence, ansatz, parser, rewriter)
            # Fix: Store the actual num_qubits, not hardcoded 1
            state_data[sentence] = (state_vec, num_qubits)
            print(f"Sentence {s_idx+1}: '{sentence}'")
            print(f"Generated state with {num_qubits} qubits")
        except Exception as e:
            print(f"Error processing sentence '{sentence}': {e}")
            state_data[sentence] = (None, None)

    print("\n--- Performing Swap Tests ---")
    
    # Filter out sentences that did not produce valid states
    valid_sentences = [s for s in sentences if state_data[s][0] is not None and state_data[s][1] is not None and state_data[s][1] > 0]

    if not valid_sentences:
        print("No valid multi-qubit states generated. Cannot perform Swap Tests meaningfully.")
    else:
        first_num_qubits = state_data[valid_sentences[0]][1]
        all_same_qubits = all(state_data[s][1] == first_num_qubits for s in valid_sentences)

        if not all_same_qubits:
            print("\nWarning: Not all valid sentences resulted in circuits with the same number of qubits.")
            print("Swap Test requires states to have the same number of qubits.")
            print("Pairs with different qubit counts will be skipped.")
            for s in valid_sentences:
                print(f"  '{s}': {state_data[s][1]} qubits")

        for i in range(len(valid_sentences)):
            for j in range(i, len(valid_sentences)):
                s1 = valid_sentences[i]
                s2 = valid_sentences[j]

                vec1, nq1 = state_data[s1]
                vec2, nq2 = state_data[s2]

                if nq1 == nq2:
                    print(f"\nSwap Test between '{s1}' and '{s2}':")
                    # Fix: Use nq1 (or nq2, they're equal) instead of undefined num_qubits
                    overlap = swap_test(vec1, vec2, nq1)
                    print(f"Estimated squared overlap: {overlap:.4f}")
                    if s1 == s2:
                        print(" (Expected to be close to 1.0 for identical states)")
                else:
                    print(f"\nSkipping Swap Test between '{s1}' ({nq1} qubits) and '{s2}' ({nq2} qubits) due to different qubit counts.")

--- Generating states from sentences ---
Sentence 1: 'Alice loves the dog that Bob purchased.'
Generated state with 15 qubits
Sentence 2: 'Bob loves the dog that Alice sold.'
Generated state with 15 qubits
Sentence 3: 'The big cat sleeps peacefully.'
Generated state with 9 qubits
Sentence 4: 'The small bird sings loudly.'
Generated state with 9 qubits

--- Performing Swap Tests ---

Swap Test requires states to have the same number of qubits.
Pairs with different qubit counts will be skipped.
  'Alice loves the dog that Bob purchased.': 15 qubits
  'Bob loves the dog that Alice sold.': 15 qubits
  'The big cat sleeps peacefully.': 9 qubits
  'The small bird sings loudly.': 9 qubits

Swap Test between 'Alice loves the dog that Bob purchased.' and 'Alice loves the dog that Bob purchased.':
Comparing: [-0.00667589-0.00140003j  0.00011435-0.00154177j  0.00010081-0.00033317j
 ...  0.0043543 -0.00048211j  0.00641298-0.00140458j
  0.00663726-0.00498617j] and [-0.00667589-0.00140003j  0.000114

MemoryError: Unable to allocate 32.0 GiB for an array with shape (2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2) and data type complex128