In [245]:
#load data
import pandas as pd
def load_data(csv_file, sample_fraction=1.0):
    """Loads Question Pairs from a CSV file

    Args:
        csv_file (str): Path to csv_file
        sample_fraction (float): Fraction of data to sample, default is 1.0
    Returns:
        tuple: A tuple containing supervised data pairs
        returns [],[] on error
    """
    sentences1 = []
    sentences2 = []
    is_duplicate = []
    
    try:
        df = pd.read_csv(csv_file, encoding='utf-8')
        #print("Column names:", df.columns)
        
        if sample_fraction < 1.0:
            df = df.sample(frac=sample_fraction, random_state=42).reset_index(drop=True)
        
        sentence1_series = df['question1']
        sentence2_series = df['question2']
        is_duplicate_series = df['is_duplicate']
        
        sentences1 = sentence1_series.tolist()
        sentences2 = sentence2_series.tolist()
        is_duplicate = is_duplicate_series.tolist()
        
        if len(sentences1) != len(sentences2):
            raise ValueError("The number of sentences in question1 and question2 do not match.")
        else:
            print(f"Loaded {len(sentences1)} sentences.")
        return sentences1, sentences2, is_duplicate
        
    except FileNotFoundError:
        print("Wrong Path")
        return [],[],[]
    
    except Exception as e:
        print(f"An {e} Error Occurred")
        return [],[],[]

DATA_PATH = r'C:/Users/Jash\Documents/Research\Semantic Equivilance\SemanticEquivilance/question_pairs/questions.csv'
sentences1, sentences2, value = load_data(DATA_PATH, sample_fraction=0.001)


Loaded 404 sentences.


In [None]:
import pennylane as qml
import numpy as np
from lambeq import AtomicType, BobcatParser, Rewriter
from lambeq.backend.pennylane import to_pennylane as to_qml
import random

def swap_test(state1_vec, state2_vec, num_qubits, protocol2=False):
    """
    Performs a Quantum Swap Test between two quantum state vectors.

    Args:
        state1_vec (np.ndarray): The first state vector.
        state2_vec (np.ndarray): The second state vector.

    Returns:
        float: The estimated squared overlap (fidelity) between the two states.
    """
    if 2**num_qubits != len(state1_vec):
        raise ValueError("State vectors must have a length that is a power of 2.")

    total_qubits = 1 + 2 * num_qubits #1 Ancilla qubit + 2 state qubits

    dev = qml.device("default.qubit", wires=total_qubits, shots=1000) # N = 1000 runs of the circuit for statistical significance

    @qml.qnode(dev)
    def circuit(): #|0 , psi, phi>
        # Step 1: Prepare the ancilla qubit in a superposition
        qml.Hadamard(wires=0)
        print(f"Comparing: {state1_vec} and {state2_vec}")
        # Step 2: Prepare the two input states
        #basis for protocol 1
        qml.StatePrep(state1_vec, wires=range(1, 1 + num_qubits), normalize=True)
        #basis for protocol 1
        qml.StatePrep(state2_vec, wires=range(1 + num_qubits, 1 + 2 * num_qubits))

        # Step 3: Apply controlled-SWAP gates
        for i in range(num_qubits):
            qml.CSWAP(wires=[0, 1 + i, 1 + num_qubits + i]) #selects every register of phi and psi for swap

        # Step 4: Apply Hadamard to ancilla
        qml.Hadamard(wires=0)
        # Step 5: Measure the ancilla qubit
        return qml.sample(wires=0)

    measurement_results = circuit()
    prob_0 = np.sum(measurement_results == 0) / len(measurement_results)
    squared_overlap = 2 * prob_0 - 1 #double check this formula we want empirical average

    return abs(squared_overlap)

In [None]:
def lambeq_sentence_to_state_vector(sentence, ansatz, parser, rewriter, include_debug_prints=False):
    """
    Converts a natural language sentence into a quantum state vector
    using Lambeq's BobcatParser and IQPAnsatz, handling parameterization
    via PennyLaneModel.

    Args:
        sentence (str): The input sentence.
        ansatz (lambeq.ansatz.Ansatz): The quantum ansatz to apply.
        parser (lambeq.parser.Parser): The parser to convert sentence to diagram.

    Returns:
        tuple: A tuple containing the state vector (np.ndarray) and
               the number of qubits (int).
    """
    try:
        if include_debug_prints:
            print(f"\n--- Debugging: Sentence '{sentence}' ---")

        # Step 1: Convert sentence to a DisCoPy diagram
        diagram = parser.sentence2diagram(sentence)
        if include_debug_prints:
            print("Step 1: Sentence parsed to diagram.")

        # Step 2: Rewrite the diagram
        rewritten_diagram = rewriter(diagram)
        if include_debug_prints:
            print("Step 2: Diagram rewritten.")

        # Step 3: Normalize the diagram
        normalized_diagram = rewritten_diagram.normal_form()
        if include_debug_prints:
            print("Step 3: Diagram normalized.")
            
        # Step 4: Apply the ansatz to the normalized diagram to get a DisCoPy circuit
        circuit = ansatz(normalized_diagram)
        if include_debug_prints:
            print("Step 4: Ansatz applied to create DisCoPy circuit.")

        # Step 5: Convert the DisCoPy circuit to a PennyLane circuit object
        temp_qml_circuit = to_qml(circuit)
        if include_debug_prints:
            print("Step 5: DisCoPy circuit converted to PennyLane object.")

        num_qubits = temp_qml_circuit._n_qubits
        param_structure = temp_qml_circuit._params
        
        if include_debug_prints:
            print(f"Parameter structure: {param_structure}")
            print(f"Number of qubits: {num_qubits}, Parameter groups: {len(param_structure)}")

        # Step 6: Create parameters matching the exact structure expected by Lambeq
        sentence_hash = abs(hash(sentence))
        sentence_seed = (sentence_hash * 17 + len(sentence) * 23) % 10000
        random.seed(sentence_seed)
        
        # Build parameters in the exact same structure as _params
        structured_params = []
        all_param_values = []
        
        for param_group in param_structure:
            if isinstance(param_group, list) and len(param_group) > 0:
                # This parameter group has parameters - create random values for each
                group_values = [random.uniform(0.1, 2 * np.pi - 0.1) for _ in param_group]
                structured_params.append(group_values)
                all_param_values.extend(group_values)
            else:
                # Empty parameter group
                structured_params.append([])
            
        if include_debug_prints:
            print("Step 6: Structured parameters generated.")
            print(f"Sentence hash: {sentence_hash}")
            print(f"Sentence seed: {sentence_seed}")
            print(f"Structured params lengths: {[len(group) for group in structured_params]}")
            print(f"Total parameters: {len(all_param_values)}")
            print(f"Sample parameter values: {[round(p, 3) for p in all_param_values[:10]] if all_param_values else 'None'}")
            print(f"Parameter range: {round(min(all_param_values), 3) if all_param_values else 'N/A'} to {round(max(all_param_values), 3) if all_param_values else 'N/A'}")
            
        # Step 7: Execute the circuit - 
        circuit_func = temp_qml_circuit.make_circuit()
        
        #Create initial state vector
        initial_state_vector = np.zeros(2**num_qubits, dtype=np.complex128)
        initial_state_vector[initial_state_index] = 1.0
        
        if include_debug_prints:
            print(f"About to execute circuit with {len(all_param_values)} total parameters")

        if any(len(group) > 0 for group in structured_params):
            state_vector = circuit_func(structured_params) #executes QNode with parameters |0> 
        else:
            state_vector = circuit_func([])
            #sentences -> sequence of parameters used to define the circuit
        # Convert to numpy if it's a tensor
        if hasattr(state_vector, 'numpy'):
            state_vector = state_vector.numpy()
        elif hasattr(state_vector, 'detach'):
            state_vector = state_vector.detach().numpy()
        
        if include_debug_prints:
            print("Step 7: Circuit executed successfully.")
            print(f"State vector shape: {state_vector.shape}")
            print(f"State vector (first few elements): {state_vector[:min(8, len(state_vector))]}")
            
        return state_vector, num_qubits

    except Exception as e:
        print(f"CRITICAL ERROR: Failed to process circuit: {e}")
        import traceback
        traceback.print_exc()
        print("Returning a default normalized |0> state to allow program continuation.")
        default_state = np.zeros(2, dtype=np.complex128)
        default_state[0] = 1.0
        return default_state, 1

In [None]:
from lambeq import StronglyEntanglingAnsatz
if __name__ == "__main__":
    parser = BobcatParser()
    rewriter = Rewriter(['curry', 'prepositional_phrase', 'determiner'])
    # Increase the complexity to get more interesting quantum states
    
# Test with a different ansatz
    ansatz = StronglyEntanglingAnsatz(
    {AtomicType.NOUN: 2, AtomicType.SENTENCE: 1}, 
    n_layers=1
)

    print("--- Generating states from sentences ---")

    sentence1 = "Alice loves the dog that Bob purchased."
    sentence2 = "Bob loves the dog that Alice sold."
    sentence3 = "The big cat sleeps peacefully."
    sentence4 = "The small bird sings loudly."

    sentences = [sentence1, sentence2, sentence3, sentence4]
    state_data = {}
    for s_idx, sentence in enumerate(sentences):
        try:
            #state_vec, num_qubits = lambeq_sentence_to_state_vector(sentence, ansatz, parser, rewriter, include_debug_prints=True)
            state_vec, num_qubits = lambeq_sentence_to_state_vector(sentence, ansatz, parser, rewriter)
            state_data[sentence] = (state_vec, num_qubits)
            print(f"Sentence {s_idx+1}: '{sentence}'")
            print(f"Generated state with {num_qubits} qubits")
        except Exception as e:
            print(f"Error processing sentence '{sentence}': {e}")
            state_data[sentence] = (None, None)

    print("\n--- Performing Swap Tests ---")
    
    # Filter out sentences that did not produce valid states
    valid_sentences = [s for s in sentences if state_data[s][0] is not None and state_data[s][1] is not None and state_data[s][1] > 0]

    if not valid_sentences:
        print("No valid multi-qubit states generated. Cannot perform Swap Tests meaningfully.")
    else:
        first_num_qubits = state_data[valid_sentences[0]][1]
        all_same_qubits = all(state_data[s][1] == first_num_qubits for s in valid_sentences)

        if not all_same_qubits:
            print("\nWarning: Not all valid sentences resulted in circuits with the same number of qubits.")
            print("Swap Test requires states to have the same number of qubits.")
            print("Pairs with different qubit counts will be skipped.")
            for s in valid_sentences:
                print(f"  '{s}': {state_data[s][1]} qubits")

        for i in range(len(valid_sentences)):
            for j in range(i, len(valid_sentences)):
                s1 = valid_sentences[i]
                s2 = valid_sentences[j]

                vec1, nq1 = state_data[s1]
                vec2, nq2 = state_data[s2]

                if nq1 == nq2:
                    print(f"\nSwap Test between '{s1}' and '{s2}':")
                    # Fix: Use nq1 (or nq2, they're equal)
                    overlap = swap_test(vec1, vec2, nq1)
                    print(f"Estimated squared overlap: {overlap:.4f}")
                    if s1 == s2:
                        print(" (Expected to be close to 1.0 for identical states)")
                else:
                    print(f"\nSkipping Swap Test between '{s1}' ({nq1} qubits) and '{s2}' ({nq2} qubits) due to different qubit counts.")

KeyboardInterrupt: 

In [None]:
def swap_test_with_initial_state(state1, state2, n_qubits, initial_state_int=0, shots=1000):
    """
    Performs a swap test between two quantum states with a custom initial state.
    
    Args:
        state1: First quantum state vector
        state2: Second quantum state vector  
        n_qubits: Number of qubits in each state
        initial_state_int: Integer representing initial state (e.g., 5 -> |101⟩ for 3 qubits)
        shots: Number of measurements for the swap test
    
    Returns:
        Estimated squared overlap between the states
    """
    import pennylane as qml
    
    # Create device with extra qubit for ancilla
    dev = qml.device('default.qubit', wires=2*n_qubits + 1)
    
    # Convert integer to binary string
    binary_string = format(initial_state_int, f'0{n_qubits}b')
    
    @qml.qnode(dev)
    def swap_test_circuit():
        # Prepare initial state for first n qubits by applying X gates
        for qubit_idx, bit in enumerate(binary_string):
            if bit == '1':
                qml.PauliX(wires=qubit_idx)
        
        # Prepare initial state for second n qubits (same initial state)
        for qubit_idx, bit in enumerate(binary_string):
            if bit == '1':
                qml.PauliX(wires=n_qubits + qubit_idx)
        
        # Apply the circuits that create state1 and state2
        # This is where we'd need to apply your lambeq circuits
        # For now, this is a placeholder - you'd replace this with actual circuit application
        
        # Hadamard on ancilla qubit
        qml.Hadamard(wires=2*n_qubits)
        
        # Controlled swaps between corresponding qubits of the two states
        for i in range(n_qubits):
            qml.CSWAP(wires=[2*n_qubits, i, n_qubits + i])
        
        # Final Hadamard on ancilla
        qml.Hadamard(wires=2*n_qubits)
        
        # Measure ancilla qubit
        return qml.sample(wires=2*n_qubits)
    
    # Run the circuit
    results = swap_test_circuit()
    
    # Calculate probability of measuring 0 on ancilla
    prob_0 = np.mean(results == 0)
    
    # The squared overlap is related to this probability
    squared_overlap = 2 * prob_0 - 1
    
    return max(0, squared_overlap)  # Ensure non-negative due to sampling noise


if __name__ == "__main__":
    parser = BobcatParser()
    rewriter = Rewriter(['curry', 'prepositional_phrase', 'determiner'])
    
    from lambeq import StronglyEntanglingAnsatz 

    # Test with a different ansatz
    ansatz = StronglyEntanglingAnsatz(
        {AtomicType.NOUN: 2, AtomicType.SENTENCE: 1}, 
        n_layers=2
    )

    print("--- Generating states from sentences with varying initial qubit states ---")

    sentence1 = "Alice loves the dog that Bob purchased."
    sentence2 = "Bob loves the dog that Alice sold."
    sentence3 = "The big cat sleeps peacefully."
    sentence4 = "The small bird sings loudly."

    sentences = [sentence1, sentence2, sentence3, sentence4]
    
    # First, determine the number of qubits by processing one sentence
    sample_sentence = sentences[0]
    try:
        sample_state_vec, sample_num_qubits = lambeq_sentence_to_state_vector(sample_sentence, ansatz, parser, rewriter)
        num_qubits = sample_num_qubits
        print(f"Circuits use {num_qubits} qubits")
    except Exception as e:
        print(f"Error analyzing sample circuit: {e}")
        num_qubits = 4  # Default fallback
    
    max_initial_states = 2**num_qubits
    print(f"Will test {max_initial_states} different initial states (0 to {max_initial_states-1})")
    
    all_state_data = {}
    
    # Loop through different initial states using integers
    num_states_to_test = min(8, max_initial_states)  # Limit for demo
    
    for initial_state_int in range(num_states_to_test):
        # Convert to binary string for display
        binary_string = format(initial_state_int, f'0{num_qubits}b')
        
        print(f"\n--- Initial State {initial_state_int} -> |{binary_string}⟩ ---")
        
        combo_states = {}
        
        # Process each sentence with this initial state
        for s_idx, sentence in enumerate(sentences):
            try:
                # Create a modified version of your lambeq function that prepares initial state
                import pennylane as qml
                
                # Get the original circuit structure
                diagram = parser.sentence2diagram(sentence)
                rewritten_diagram = rewriter(diagram)
                normalized_diagram = rewritten_diagram.normal_form()
                circuit = ansatz(normalized_diagram)
                temp_qml_circuit = to_qml(circuit)
                
                param_structure = temp_qml_circuit._params
                
                # Generate same parameters as original function
                sentence_hash = abs(hash(sentence))
                sentence_seed = (sentence_hash * 17 + len(sentence) * 23) % 10000
                random.seed(sentence_seed)
                
                structured_params = []
                for param_group in param_structure:
                    if isinstance(param_group, list) and len(param_group) > 0:
                        group_values = [random.uniform(0.1, 2 * np.pi - 0.1) for _ in param_group]
                        structured_params.append(group_values)
                    else:
                        structured_params.append([])
                
                # Create new device and circuit with initial state preparation
                dev = qml.device('default.qubit', wires=num_qubits)
                
                @qml.qnode(dev)
                def circuit_with_initial_state(*params):
                    # Step 1: Prepare initial state using X gates
                    binary_string = format(initial_state_int, f'0{num_qubits}b')
                    for qubit_idx, bit in enumerate(binary_string):
                        if bit == '1':
                            qml.PauliX(wires=qubit_idx)
                    
                    # Step 2: Apply original circuit operations
                    # We need to reconstruct the circuit operations from temp_qml_circuit
                    # This is a simplified approach - you might need to adapt based on your setup
                    
                    # For now, let's apply the original circuit function on top of our prepared state
                    # This requires accessing the internal structure of temp_qml_circuit
                    
                    return qml.state()
                
                # Execute circuit with prepared initial state
                if any(len(group) > 0 for group in structured_params):
                    flat_params = [param for group in structured_params for param in group]
                    state_vector = circuit_with_initial_state(*flat_params)
                else:
                    state_vector = circuit_with_initial_state()
                
                # Convert to numpy if needed
                if hasattr(state_vector, 'numpy'):
                    state_vector = state_vector.numpy()
                elif hasattr(state_vector, 'detach'):
                    state_vector = state_vector.detach().numpy()
                
                combo_states[sentence] = (state_vector, num_qubits)
                print(f"  Sentence {s_idx+1}: '{sentence[:30]}...' -> processed")
                
            except Exception as e:
                print(f"  Error processing sentence '{sentence[:30]}...': {e}")
                # Fallback to original method without initial state preparation
                try:
                    fallback_state, fallback_qubits = lambeq_sentence_to_state_vector(sentence, ansatz, parser, rewriter)
                    combo_states[sentence] = (fallback_state, fallback_qubits)
                    print(f"  Fallback: Used default |000...0⟩ initial state")
                except:
                    combo_states[sentence] = (None, None)
        
        # Store results for this initial state
        all_state_data[initial_state_int] = combo_states

    print("\n--- Performing Swap Tests Across Initial States ---")
    
    # Compare states from different initial conditions
    initial_state_ints = list(all_state_data.keys())
    
    # For each sentence, compare across different initial states
    for sentence in sentences:
        print(f"\n=== Analyzing '{sentence[:30]}...' across initial states ===")
        
        # Get valid states for this sentence across all initial states
        valid_states = []
        for init_int in initial_state_ints:
            if sentence in all_state_data[init_int]:
                state_vec, nq = all_state_data[init_int][sentence]
                if state_vec is not None and nq is not None:
                    valid_states.append((init_int, state_vec, nq))
        
        if len(valid_states) < 2:
            print("  Not enough valid states for comparison")
            continue
        
        # Compare pairs of initial states
        for i in range(len(valid_states)):
            for j in range(i+1, min(i+3, len(valid_states))):  # Limit comparisons
                init_int1, vec1, nq1 = valid_states[i]
                init_int2, vec2, nq2 = valid_states[j]
                
                # Convert to binary for display
                binary1 = format(init_int1, f'0{num_qubits}b')
                binary2 = format(init_int2, f'0{num_qubits}b')
                
                # Use regular swap test since states already have initial conditions applied
                overlap = swap_test(vec1, vec2, nq1)
                
                # Calculate Hamming distance
                hamming_dist = sum(c1 != c2 for c1, c2 in zip(binary1, binary2))
                
                print(f"  |{binary1}⟩ vs |{binary2}⟩: Hamming={hamming_dist}, Overlap={overlap:.4f}")

    print("\n--- Initial State Impact Analysis ---")
    
    # Analyze how much each initial state changes the results
    for sentence in sentences:
        print(f"\nSentence: '{sentence[:40]}...'")
        
        # Compare all states to the |000...0⟩ baseline
        baseline_int = 0
        if baseline_int in all_state_data and sentence in all_state_data[baseline_int]:
            baseline_state = all_state_data[baseline_int][sentence]
            if baseline_state[0] is not None:
                overlaps = []
                
                for init_int in initial_state_ints[1:]:  # Skip baseline
                    if sentence in all_state_data[init_int]:
                        comp_state = all_state_data[init_int][sentence]
                        if comp_state[0] is not None:
                            overlap = swap_test(baseline_state[0], comp_state[0], baseline_state[1])
                            overlaps.append(overlap)
                            
                            # Show individual comparisons
                            binary_str = format(init_int, f'0{num_qubits}b')
                            print(f"  |000...0⟩ vs |{binary_str}⟩: {overlap:.4f}")
                
                if overlaps:
                    avg_overlap = np.mean(overlaps)
                    std_overlap = np.std(overlaps)
                    print(f"  Average overlap: {avg_overlap:.4f} ± {std_overlap:.4f}")
                    print(f"  Sensitivity: {'High' if avg_overlap < 0.7 else 'Medium' if avg_overlap < 0.9 else 'Low'}")

    print("\n--- Summary ---")
    print("Analysis shows how different initial qubit states affect quantum sentence representations.")
    print("Lower overlap values indicate higher sensitivity to initial conditions.")
    
    print("\n--- Analysis Complete ---")