In [245]:
#load data
import pandas as pd
def load_data(csv_file, sample_fraction=1.0):
    """Loads Question Pairs from a CSV file

    Args:
        csv_file (str): Path to csv_file
        sample_fraction (float): Fraction of data to sample, default is 1.0
    Returns:
        tuple: A tuple containing supervised data pairs
        returns [],[] on error
    """
    sentences1 = []
    sentences2 = []
    is_duplicate = []
    
    try:
        df = pd.read_csv(csv_file, encoding='utf-8')
        #print("Column names:", df.columns)
        
        if sample_fraction < 1.0:
            df = df.sample(frac=sample_fraction, random_state=42).reset_index(drop=True)
        
        sentence1_series = df['question1']
        sentence2_series = df['question2']
        is_duplicate_series = df['is_duplicate']
        
        sentences1 = sentence1_series.tolist()
        sentences2 = sentence2_series.tolist()
        is_duplicate = is_duplicate_series.tolist()
        
        if len(sentences1) != len(sentences2):
            raise ValueError("The number of sentences in question1 and question2 do not match.")
        else:
            print(f"Loaded {len(sentences1)} sentences.")
        return sentences1, sentences2, is_duplicate
        
    except FileNotFoundError:
        print("Wrong Path")
        return [],[],[]
    
    except Exception as e:
        print(f"An {e} Error Occurred")
        return [],[],[]

DATA_PATH = r'C:/Users/Jash\Documents/Research\Semantic Equivilance\SemanticEquivilance/question_pairs/questions.csv'
sentences1, sentences2, value = load_data(DATA_PATH, sample_fraction=0.001)


Loaded 404 sentences.


In [None]:
import pennylane as qml
import numpy as np
from lambeq import AtomicType, BobcatParser, IQPAnsatz, Rewriter
from lambeq.backend.pennylane import to_pennylane as to_qml
import random

def get_circuit_state_vector(qml_qnode_func, num_wires):
    """
    Executes a PennyLane QNode function and returns the state vector.
    The qml_qnode_func should return a qml.QNode object.
    """
    dev = qml.device("default.qubit", wires=num_wires)

    @qml.qnode(dev)
    def state_vector_wrapper():
        qml_qnode_func() # Execute the actual circuit operations
        return qml.state()

    return state_vector_wrapper()

def swap_test(state1_vec, state2_vec, num_qubits):
    """
    Performs a Quantum Swap Test between two quantum state vectors.

    Args:
        state1_vec (np.ndarray): The first state vector.
        state2_vec (np.ndarray): The second state vector.

    Returns:
        float: The estimated squared overlap (fidelity) between the two states.
    """
    if 2**num_qubits != len(state1_vec):
        raise ValueError("State vectors must have a length that is a power of 2.")

    total_qubits = 1 + 2 * num_qubits #1 Ancilla qubit + 2 state qubits

    dev = qml.device("default.qubit", wires=total_qubits, shots=1000) # N = 1000 runs of the circuit for statistical significance

    @qml.qnode(dev)
    def circuit(): #|0 , psi, phi>
        # Step 1: Prepare the ancilla qubit in a superposition
        qml.Hadamard(wires=0)
        print(f"Comparing: {state1_vec} and {state2_vec}")
        # Step 2: Prepare the two input states
        qml.StatePrep(state1_vec, wires=range(1, 1 + num_qubits), normalize=True) #take another look at initial state prep

        qml.StatePrep(state2_vec, wires=range(1 + num_qubits, 1 + 2 * num_qubits))

        # Step 3: Apply controlled-SWAP gates
        for i in range(num_qubits):
            qml.CSWAP(wires=[0, 1 + i, 1 + num_qubits + i]) #selects every register of phi and psi for swap

        # Step 4: Apply Hadamard to ancilla
        qml.Hadamard(wires=0)
        # Step 5: Measure the ancilla qubit
        return qml.sample(wires=0)

    measurement_results = circuit()
    prob_0 = np.sum(measurement_results == 0) / len(measurement_results)
    squared_overlap = 2 * prob_0 - 1

    return squared_overlap

def lambeq_sentence_to_state_vector(sentence, ansatz, parser, rewriter, include_debug_prints=False):
    """
    Converts a natural language sentence into a quantum state vector
    using Lambeq's BobcatParser and IQPAnsatz, handling parameterization
    via PennyLaneModel.

    Args:
        sentence (str): The input sentence.
        ansatz (lambeq.ansatz.Ansatz): The quantum ansatz to apply.
        parser (lambeq.parser.Parser): The parser to convert sentence to diagram.

    Returns:
        tuple: A tuple containing the state vector (np.ndarray) and
               the number of qubits (int).
    """
    try:
        
        diagram = parser.sentence2diagram(sentence)
        rewritten_diagram = rewriter(diagram) # type: ignore[reportArgumentType]
        normalized_diagram = rewritten_diagram.normal_form() # Normalize the diagram
        if include_debug_prints:
            print(f"\n--- Debugging: Sentence '{sentence}' ---")
            #print(f"DisCoPy Diagram (before ansatz):\n{diagram}")

        circuit = ansatz(normalized_diagram) #IQPansatz
        #if include_debug_prints:
            #print(f"DisCoPy Circuit (after ansatz):\n{circuit}")
            #circuit.draw(figsize=(20, 10), fontsize=6, textpad=0.3)#, draw_type_labels=False, fontsize=8,foliated=True) # Uncomment to visualize the circuit
                
        #if include_debug_prints:
            #print("--- Step 6: Defining PennyLane device and QNode ---")
        
        temp_circuit = to_qml(circuit) #object to pennylane circuit object
        num_qubits = temp_circuit._n_qubits #get number of qubits from circuit object
        
        num_parameters = len(temp_circuit._params) #get number of parameters from circuit object
        
        print("work2")
        initial_params = np.array([]) # Initialize as empty
        if num_parameters > 0:
            random.seed(42) # For reproducibility during debugging
            initial_params = np.array([random.uniform(0, 2 * np.pi) for _ in range(num_parameters)])
        dev = qml.device("default.qubit", wires=num_qubits) # No shots needed for state vector retrieval
        # Define a QNode to execute the circuit and retrieve the state vector
        @qml.qnode(dev)
        def get_vector(params):
            temp_circuit.make_circuit()(params) # Execute the circuit with the parameters
            return qml.state() # This function will be used to get the state vector from the circuit
        state_vector = get_vector(initial_params) # Execute the circuit to get the state vector
        return state_vector, num_qubits# Convert the state to a NumPy array
        
        if include_debug_prints:
            print("--- Step 7: Executing QNode to get state vector ---")
        state_vector = get_quantum_state_vector(temp_circuit)
        if include_debug_prints:print("Step 7.7: Successfully evaluated DisCoPy circuit to get state vector")
        print(state_vector)
        #state = np.array(state, dtype=np.complex128)
        
        qml.StatePrep(state_vector, wires=range(16), pad_with=True) # Set the state for the circuit to our resultant state vector
        if include_debug_prints:
            print("--- Step 8: Successfully generated state vector ---")
            print(f"State vector shape: {state_vector.shape}")
            print(f"State vector (first 5 elements): {state_vector[:5]}")  # Print first 5 elements for debugging
        return state_vector, num_qubits

    except Exception as e:
            print(f"CRITICAL ERROR: Failed to process circuit {e}")
            print("Returning a default normalized |0> state to allow program continuation.")
            default_state = np.zeros(2, dtype=np.complex128) #|0>
            default_state[0] = 1.0 # then set the first element to 1 to represent |0>
            return default_state, 1 #1 is default number of qubits for |0> state
        

In [314]:
if __name__ == "__main__":
    parser = BobcatParser()
    rewriter = Rewriter(['curry', 'prepositional_phrase', 'determiner'])
    ansatz = IQPAnsatz({AtomicType.NOUN: 1, AtomicType.SENTENCE: 1}, n_layers=1, n_single_qubit_params=3) #sentence number determines number of output qubits

    print("--- Generating states from sentences ---")

    sentence1 = "Alice loves the dog that Bob purchased."
    sentence2 = "Bob loves the dog that Alice sold."
    sentence3 = "The big cat sleeps peacefully."
    sentence4 = "The small bird sings loudly."

    sentences = [sentence1, sentence2, sentence3, sentence4]
    state_data = {}

    for s_idx, sentence in enumerate(sentences):
        try:
            state_vec, num_qubits = lambeq_sentence_to_state_vector(sentence, ansatz, parser, rewriter,include_debug_prints=True)
            state_data[sentence] = (state_vec, 1)
            print(f"Sentence {s_idx+1}: '{sentence}'")
            #if state_vec is not None:
                #print(f"State vector shape: {state_vec.shape}")
            #else:
                #print("State vector is None, indicating a failure to generate a valid state.")
        except Exception as e:
            print(f"Error processing sentence '{sentence}': {e}")
            state_data[sentence] = (None, None) #Failed

    print("\n--- Performing Swap Tests ---")
    
    # Filter out sentences that did not produce valid states

    valid_sentences = [s for s in sentences if state_data[s][0] is not None and state_data[s][1] is not None and state_data[s][1] > 0]

    if not valid_sentences:
        print("No valid multi-qubit states generated. Cannot perform Swap Tests meaningfully.")
    else:
        first_num_qubits = state_data[valid_sentences[0]][1]
        all_same_qubits = all(state_data[s][1] == first_num_qubits for s in valid_sentences)

        if not all_same_qubits:
            print("\nWarning: Not all valid sentences resulted in circuits with the same number of qubits.")
            print("Swap Test requires states to have the same number of qubits.")
            print("Pairs with different qubit counts will be skipped.")
            for s in valid_sentences:
                print(f"  '{s}': {state_data[s][1]} qubits")

        for i in range(len(valid_sentences)):
            for j in range(i, len(valid_sentences)):
                s1 = valid_sentences[i]
                s2 = valid_sentences[j]

                vec1, nq1 = state_data[s1]
                vec2, nq2 = state_data[s2]

                if nq1 == nq2:
                    print(f"\nSwap Test between '{s1}' and '{s2}':")
                    overlap = swap_test(vec1, vec2,num_qubits)
                    print(f"Estimated squared overlap: {overlap:.4f}")
                    #if s1 == s2:
                        #print(" (Expected to be close to 1.0 for identical states)")
                else:
                    print(f"\nSkipping Swap Test between '{s1}' ({nq1} qubits) and '{s2}' ({nq2} qubits) due to different qubit counts.")

--- Generating states from sentences ---

--- Debugging: Sentence 'Alice loves the dog that Bob purchased.' ---
work2
work
CRITICAL ERROR: Failed to process circuit 'numpy.float64' object is not iterable
Returning a default normalized |0> state to allow program continuation.
Error processing sentence 'Alice loves the dog that Bob purchased.': 'Diagram' object is not callable

--- Debugging: Sentence 'Bob loves the dog that Alice sold.' ---
work2
work
CRITICAL ERROR: Failed to process circuit 'numpy.float64' object is not iterable
Returning a default normalized |0> state to allow program continuation.
Error processing sentence 'Bob loves the dog that Alice sold.': 'Diagram' object is not callable

--- Debugging: Sentence 'The big cat sleeps peacefully.' ---
work2
work
CRITICAL ERROR: Failed to process circuit 'numpy.float64' object is not iterable
Returning a default normalized |0> state to allow program continuation.
Error processing sentence 'The big cat sleeps peacefully.': 'Diagram'