In [7]:
# Environment and Core Libraries
import pandas as pd
import numpy as np
import random
import math
import yaml
import os #multithreading

# PennyLane and PyTorch
import pennylane as qml
import torch
from torch.nn import Module, Parameter
import torch.nn as nn
from torch.optim import Adam
from sklearn.model_selection import train_test_split

# Lambeq
from lambeq.backend.quantum import Diagram as LambeqDiagram
from discopy.quantum import gates
import spacy
import discopy
from lambeq import BobcatParser, Rewriter, IQPAnsatz, SpacyTokeniser, AtomicType
from discopy.rigid import Ty

#data handling and plotting
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt

# Patch for discopy
monoidal_module = getattr(discopy, "monoidal", None)
if monoidal_module:
    diagram_class = getattr(monoidal_module, "Diagram", None)
    if diagram_class and not hasattr(diagram_class, "is_mixed"):
        diagram_class.is_mixed = property(lambda self: False)

# Load spacy model
spacy.load('en_core_web_sm')


<spacy.lang.en.English at 0x2bc6768e060>

In [8]:
#Data Loading Function
def load_data(csv_file, sample_fraction=1.0):
    sentences1, sentences2, is_duplicate = [], [], []
    try:
        df = pd.read_csv(csv_file, encoding='utf-8')
        if sample_fraction < 1.0:
            df = df.sample(frac=sample_fraction, random_state=42).reset_index(drop=True)
        
        sentences1 = df['question1'].astype(str).tolist()
        sentences2 = df['question2'].astype(str).tolist()
        is_duplicate = df['is_duplicate'].tolist()
        
        print(f"Loaded {len(sentences1)} sentence pairs.")
        return sentences1, sentences2, is_duplicate
    except Exception as e:
        print(f"An error occurred: {e}")
        return [], [], []

def create_balanced_training_set(training_data: list) -> list:
    """Creates a balanced training set by undersampling the majority class."""
    positives = [pair for pair in training_data if pair['label'] == 1]
    negatives = [pair for pair in training_data if pair['label'] == 0]
    
    # Undersample the larger class to match the size of the smaller class
    if len(positives) > len(negatives):
        positives = random.sample(positives, len(negatives))
    else:
        negatives = random.sample(negatives, len(positives))
    
    balanced_train_set = positives + negatives
    random.shuffle(balanced_train_set)
    
    print(f"Created a balanced training set with {len(positives)} positive and {len(negatives)} negative pairs.")
    return balanced_train_set
def load_fasttext_embeddings(fasttext_file_path):
    """
    Loads FastText embeddings from a .vec file into a word-to-index dictionary
    and an embedding matrix.
    """
    print(f"Loading FastText embeddings from {fasttext_file_path}...")
    word_to_idx = {}
    embeddings = []
    
    # Add a padding token at index 0 for unknown words
    word_to_idx['<pad>'] = 0
    # The embedding dimension is 300 for this file
    embeddings.append(np.zeros(300)) 
    
    with open(fasttext_file_path, 'r', encoding='utf-8') as f:
        # The first line of a .vec file is a header, skip it
        next(f) 
        for i, line in enumerate(f):
            parts = line.split()
            word = parts[0]
            # Handle potential lines with only a word and no vector
            if len(parts) > 2:
                vector = np.array(parts[1:], dtype=np.float32)
                word_to_idx[word] = i + 1
                embeddings.append(vector)
            
    embeddings_matrix = np.array(embeddings)
    print(f"Loaded {len(word_to_idx)} word vectors.")
    return word_to_idx, embeddings_matrix

In [9]:
#Plotting functions
def plot_training_history(history):
    if not history:
        print("History is empty. Cannot plot.")
        return

    plt.figure(figsize=(10, 6))
    plt.plot(history, label='Average Loss per Epoch')
    plt.title('Training Loss Convergence')
    plt.xlabel('Epoch')
    plt.ylabel('Average Penalized Loss')
    plt.legend()
    plt.grid(True)
    plt.show()
def plot_parameter_history(param_history):
    if not param_history:
        print("Parameter history is empty. Cannot plot.")
        return

    epochs = range(len(param_history))
    means = [d['mean'] for d in param_history]
    stds = [d['std'] for d in param_history]
    mins = [d['min'] for d in param_history]
    maxs = [d['max'] for d in param_history]

    plt.figure(figsize=(10, 6))
    plt.plot(epochs, means, label='Mean Parameter Value')
    plt.fill_between(epochs, np.array(means) - np.array(stds), np.array(means) + np.array(stds), alpha=0.2, label='1 Std. Deviation')
    plt.plot(epochs, mins, linestyle='--', color='gray', label='Min/Max Range')
    plt.plot(epochs, maxs, linestyle='--', color='gray')
    
    plt.title('Evolution of Model Parameters During Training')
    plt.xlabel('Epoch')
    plt.ylabel('Parameter Value')
    plt.legend()
    plt.grid(True)
    plt.show()
def plot_parameter_evolution_polar(param_history):
    if not param_history:
        print("Parameter history is empty. Cannot plot.")
        return
    
    plt.figure(figsize=(8, 8))
    ax = plt.subplot(111, projection='polar')

    epochs = np.array(range(len(param_history)))
    
    # --- THE FIX ---
    # We use modulo 4*pi to correctly represent the parameter's period.
    mean_angles = np.array([d['mean'] for d in param_history]) % (4 * np.pi)
    # ---------------

    ax.plot(mean_angles, epochs, 'o-', label='Mean Parameter Path')

    if len(epochs) > 0:
        ax.plot(mean_angles[0], epochs[0], 'gX', markersize=12, label='Start')
        ax.plot(mean_angles[-1], epochs[-1], 'rX', markersize=12, label='End')

    ax.set_theta_zero_location('N')# pyright: ignore
    ax.set_theta_direction(-1)# pyright: ignore
    ax.set_rlabel_position(0)# pyright: ignore
    ax.set_rlim(0, len(epochs) * 1.05)# pyright: ignore
    ax.set_xlabel("Epoch")
    ax.set_title('Cyclical Evolution of Mean Parameter', pad=20)
    ax.legend()
    plt.show()
def plot_parameter_deltas(param_history):
    if len(param_history) < 2:
        print("Need at least 2 epochs to plot parameter deltas.")
        return

    mean_angles = np.array([d['mean'] for d in param_history])
    
    # Calculate the shortest angle difference between each epoch
    deltas = []
    for i in range(1, len(mean_angles)):
        prev_angle = mean_angles[i-1]
        curr_angle = mean_angles[i]
        delta = np.arctan2(np.sin(curr_angle - prev_angle), np.cos(curr_angle - prev_angle))
        deltas.append(delta)

    plt.figure(figsize=(10, 6))
    # We plot against epochs 1 to N, since the first delta occurs at epoch 1
    plt.plot(range(1, len(mean_angles)), deltas, 'o-', label='Change in Mean Parameter (Delta)')
    
    plt.axhline(0, color='red', linestyle='--', label='No Change')
    plt.title('Epoch-to-Epoch Change in Mean Parameter Value')
    plt.xlabel('Epoch')
    plt.ylabel('Shortest Angle Difference (Radians)')
    plt.legend()
    plt.grid(True)
    plt.xticks(range(1, len(mean_angles)))
    plt.show()


def plot_confusion_matrix(y_true, y_pred, threshold=0.5):
    """
    Computes and plots a confusion matrix.
    
    Args:
        y_true (np.array): The ground-truth labels (0s and 1s).
        y_pred (np.array): The model's raw probability predictions (overlaps from 0 to 1).
        threshold (float): The cutoff for classifying a prediction as 1.
    """
    # Convert probability predictions to binary 0/1 predictions
    binary_preds = (y_pred >= threshold).astype(int)
    
    cm = confusion_matrix(y_true, binary_preds)
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Predicted Non-Duplicate', 'Predicted Duplicate'],
                yticklabels=['Actual Non-Duplicate', 'Actual Duplicate'])
    plt.title('Confusion Matrix')
    plt.ylabel('Actual Label')
    plt.xlabel('Predicted Label')
    plt.show()
def plot_roc_curve(y_true, y_pred):
    """
    Computes and plots the ROC curve and AUC score.
    """
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Guess')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.show()

In [None]:
# QNLP MODEL AND TRAINING PIPELINE
# 1. THE QNLP MODEL CLASS
# ===============================================================
class QNLPModel(nn.Module):
    def __init__(self, symbols, word_to_idx, embeddings):
        super().__init__()
        
        self.embedding = nn.Embedding.from_pretrained(
            torch.from_numpy(embeddings), freeze=False, padding_idx=0)
        
        self.word_to_idx = word_to_idx
        self.symbols_map = {}
        for s in symbols:
            word = s.name.split('_')[-1]
            if word in self.word_to_idx:
                self.symbols_map[s.name.replace('.', '_')] = self.word_to_idx[word]
            else:
                self.symbols_map[s.name.replace('.', '_')] = 0
        self.padding_theta = Parameter(torch.tensor(0.1)) # For RY gates
        self.padding_phi = Parameter(torch.tensor(0.1))   # For CPHASE gates

    def forward(self, diagram):
        # Filter the diagram's symbols to include only non-nouns
        parameterized_symbols = [
            s for s in diagram.free_symbols if not s.name.lower().startswith('noun')
        ]

        if not parameterized_symbols:
            return torch.tensor([])

        # Get word indices only for the symbols that require parameters
        param_indices = [self.symbols_map.get(s.name.replace('.', '_'), 0) 
                         for s in parameterized_symbols]
        
        indices_tensor = torch.tensor(param_indices, dtype=torch.long)
        
        # Look up the FastText vector for each of these non-noun words
        embedded_vectors = self.embedding(indices_tensor)
        
        # Apply the deterministic mapping to get the "warm start" parameters
        quantum_params = torch.sum(embedded_vectors[:, :10], dim=1) * 0.1
        
        return quantum_params

# ===============================================================
# 2. HELPER FUNCTIONS
# ===============================================================
def calculate_quantum_angle(squared_fidelity: float) -> float:
    """
    Calculates the quantum angle (Fubini-Study distance) between two state vectors.

    This angle represents the geometric distance between two pure states on the
    surface of the generalized Bloch sphere (Hilbert space). It is a key metric for
    quantifying the similarity of quantum states.

    Args:
        squared_fidelity (float): The squared magnitude of the inner product
                                  between the two states, i.e., |<ψ|φ>|². This is
                                  the value typically estimated by a swap test.

    Returns:
        float: The angle between the two state vectors in radians, a value from
               0 (identical states) to π/2 (orthogonal states).
    """
    # Take the square root to get the fidelity |<ψ|φ>|
    fidelity = math.sqrt(abs(squared_fidelity))

    # Clamp the value between -1.0 and 1.0 for numerical stability with acos
    clamped_fidelity = max(-1.0, min(1.0, fidelity))

    # The angle is the arccosine of the fidelity
    return math.acos(clamped_fidelity)

def get_diagram_width(diagram):
    """Calculates the true maximum width of a diagram at any point."""
    if not diagram.boxes:
        return len(diagram.cod)
    # The width is the maximum wire index a box acts on.
    return max(
        [offset + len(box.dom) for box, offset in zip(diagram.boxes, diagram.offsets)]
        + [len(diagram.cod)]
    )
    
def execute_discopy_diagram(current_width, diagram, params, wires, embedding_method='simple_pad', rotation_param=None, entangling_param=None):
    """
    Executes a DisCoPy/lambeq diagram's instructions on a specific set of wires,
    and optionally applies an entangling layer afterwards.
    """
    # Step 1: Execute the original sentence diagram as before
    wire_map = {i: w for i, w in enumerate(wires)}
    param_idx = 0
    for gate, offset in zip(diagram.boxes, diagram.offsets):
        if hasattr(qml, gate.name):
            op = getattr(qml, gate.name)
            gate_params = []
            num_params = len(gate.free_symbols)
            if num_params > 0:
                gate_params = params[param_idx : param_idx + num_params]
                param_idx += num_params
            target_wires = [wire_map[i + offset] for i in range(len(gate.dom))]
            op(*gate_params, wires=target_wires)
    # Step 2: Apply Padding Method
    ancilla_wires = wires[current_width:]
    if embedding_method == 'parameterized':
        # 1. Apply parameterized rotation layer
        if rotation_param is not None:
            for w in ancilla_wires:
                qml.RY(rotation_param, wires=w)
        # 2. Apply parameterized entangling layer (circular CPHASE)
        if entangling_param is not None and len(ancilla_wires) > 1:
            for i in range(len(ancilla_wires)):
                qml.CPHASE(entangling_param, wires=[ancilla_wires[i], ancilla_wires[(i + 1) % len(ancilla_wires)]])

    elif embedding_method == 'superposition':
        # 1. Apply Hadamard gates to all ancilla qubits to create superposition.
        for w in ancilla_wires:
            qml.Hadamard(wires=w)
        # 2. Apply a chain of CNOTs to entangle the ancilla qubits with each other.
        if len(ancilla_wires) > 1:
            for i in range(len(ancilla_wires) - 1):
                qml.CNOT(wires=[ancilla_wires[i], ancilla_wires[i+1]])
            
    elif embedding_method == 'entangle':
        # This remains the CNOT ladder between sentence and ancilla qubits.
        for i in range(min(current_width, len(ancilla_wires))):
            control_wire = wires[i]
            target_wire = ancilla_wires[i]
            qml.CNOT(wires=[control_wire, target_wire])
            
    # If embedding_method is 'simple_pad', we do nothing extra.
# ===============================================================
# 3. PREPROCESSING FUNCTION
# ===============================================================
def preprocess_data_for_model(data_pairs, Tokeniser, ansatz, parser, rewriter, qubit_limit=20):
    print(f"Starting preprocessing with a qubit limit of {qubit_limit}...")
    filtered_pairs, all_symbols, n_max = [], set(), 0
    for s1, s2, is_duplicate in data_pairs:
        try:
            d1 = ansatz(rewriter(parser.sentence2diagram(Tokeniser.tokenise_sentence(s1), tokenised=True)))
            d2 = ansatz(rewriter(parser.sentence2diagram(Tokeniser.tokenise_sentence(s2), tokenised=True)))
            width1 = get_diagram_width(d1)
            width2 = get_diagram_width(d2)
            if width1 <= qubit_limit and width2 <= qubit_limit:
                pair_data = {
                    's1': s1, 's2': s2, 'label': is_duplicate, 'd1': d1, 'd2': d2,
                    'structural_disparity': abs(len(d1.cod) - len(d2.cod)),
                    'width1': width1,
                    'width2': width2
                }
                filtered_pairs.append(pair_data)
                all_symbols.update(d1.free_symbols)
                all_symbols.update(d2.free_symbols)
                n_max = max(n_max, width1, width2)
        except Exception:
            continue
    print(f"Preprocessing complete. Found {len(filtered_pairs)} valid pairs.")
    print(f"Total unique parameters (symbols) found: {len(all_symbols)}")
    print(f"N_Max for the filtered dataset is: {n_max}")
    return filtered_pairs, sorted(list(all_symbols), key=lambda s: s.name), n_max

# ===============================================================
# 4. THE TRAINING FUNCTION (with Adam & Param Tracking)
# ===============================================================
def train_model(model, data, n_max, device_name, base_learning_rate, lambda_penalty, epochs, embedding_method='entangle'):
    optimizer = Adam(model.parameters(), lr=base_learning_rate)
    swap_dev = qml.device(device_name, wires=1 + 2 * n_max)

    loss_history, param_history = [], []
    print("--- Starting training with END-TO-END PyTorch ---")

    for epoch in range(epochs):
        total_loss_epoch, num_trained_pairs = 0, 0
        for i, pair in enumerate(data):
            
            # --- THE FIX: Define the QNode INSIDE the loop ---
            # This allows it to "close over" the non-tensor pair data.
            @qml.qnode(swap_dev, interface="torch")
            def swap_test_qnode(p1, p2, theta, phi):
                qml.Hadamard(wires=0)
                execute_discopy_diagram(
                    pair['width1'], pair['d1'], p1, wires=range(1, 1 + n_max), 
                    embedding_method=embedding_method, 
                    rotation_param=theta, entangling_param=phi)
                execute_discopy_diagram(
                    pair['width2'], pair['d2'], p2, wires=range(1 + n_max, 1 + 2 * n_max), 
                    embedding_method=embedding_method, 
                    rotation_param=theta, entangling_param=phi)
                for j in range(n_max):
                    qml.CSWAP(wires=[0, 1 + j, 1 + n_max + j])
                qml.Hadamard(wires=0)
                return qml.expval(qml.PauliZ(0))
            # ----------------------------------------------------

            optimizer.zero_grad()
            
            params1 = model(pair['d1'])
            params2 = model(pair['d2'])
            
            if params1.nelement() == 0 or params2.nelement() == 0:
                continue
            num_trained_pairs += 1

            # The QNode is now called with only the trainable tensors
            measured_overlap = swap_test_qnode(
                params1, params2, 
                model.padding_theta, model.padding_phi
            )
            
            fidelity_loss = (measured_overlap - pair['label'])**2
            structural_penalty = lambda_penalty * pair['structural_disparity']
            loss = fidelity_loss + structural_penalty

            # This will now work correctly
            loss.backward()
            optimizer.step()
            
            total_loss_epoch += loss.item()
        
        avg_loss = total_loss_epoch / num_trained_pairs if num_trained_pairs > 0 else 0
        loss_history.append(avg_loss)

        all_params = torch.cat([p.data.flatten() for p in model.parameters()]).detach().numpy()
        if all_params.size > 0:
            param_history.append({'mean': np.mean(all_params), 'std': np.std(all_params),
                                  'min': np.min(all_params), 'max': np.max(all_params)})
        
        print(f"Epoch {epoch+1}/{epochs}, Average Penalized Loss: {avg_loss:.4f}")
        
    return model, loss_history, param_history
# ===============================================================
# 5. Inference Function
# ===============================================================
def evaluate_model(model, test_data, n_max, device_name, embedding_method='entangle'):
    """
    Evaluates a trained model. This version is updated to pass the
    pre-calculated diagram widths to the execution function.
    """
    print("\n--- Starting Evaluation on Test Set ---")
    model.eval()
    swap_dev = qml.device(device_name, wires=1 + 2 * n_max)
    
    predictions = []
    true_labels = []
    
    with torch.no_grad():
        for i, pair in enumerate(test_data):
            params1, params2 = model(pair['d1']), model(pair['d2'])
            padding_theta = model.padding_theta
            padding_phi = model.padding_phi
            
            if params1.nelement() == 0 or params2.nelement() == 0:
                continue

            @qml.qnode(swap_dev, interface="torch")
            def swap_test_qnode(p1, p2, theta, phi):
                qml.Hadamard(wires=0)
                execute_discopy_diagram(
                    pair['width1'], pair['d1'], p1, wires=range(1, 1 + n_max),
                    embedding_method=embedding_method,
                    rotation_param=theta, entangling_param=phi)
                execute_discopy_diagram(
                    pair['width2'], pair['d2'], p2, wires=range(1 + n_max, 1 + 2 * n_max),
                    embedding_method=embedding_method,
                    rotation_param=theta, entangling_param=phi)
                for j in range(n_max):
                    qml.CSWAP(wires=[0, 1 + j, 1 + n_max + j])
                qml.Hadamard(wires=0)
                return qml.expval(qml.PauliZ(0))
            
            measured_overlap = swap_test_qnode(params1, params2, padding_theta, padding_phi)
            
            predictions.append(measured_overlap.item())
            true_labels.append(pair['label'])

    if not predictions:
        print("No valid pairs in the test set to evaluate.")
        return

    predictions = np.array(predictions)
    true_labels = np.array(true_labels)
    
    mse = np.mean((predictions - true_labels)**2)
    print(f"Test Set Mean Squared Error (MSE): {mse:.4f}")

    binary_preds = (predictions > 0.5).astype(int)
    accuracy = np.mean(binary_preds == true_labels) * 100
    print(f"Test Set Accuracy: {accuracy:.2f}%")
    print("\n--- Evaluation Plots ---")
    plot_confusion_matrix(true_labels, predictions)
    plot_roc_curve(true_labels, predictions)


In [11]:
#Click this to run model
# ===============================================================
# MAIN EXECUTION BLOCK (with Configuration File)
# ===============================================================
def main(config_path: str):
    """Main function to run the entire workflow from a config file."""
    
    # --- 1. Load Configuration ---
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    print("Configuration loaded:")
    print(yaml.dump(config, indent=2))

    # --- Set CPU Cores for Simulator ---
    if 'simulation' in config and 'cpu_cores' in config['simulation']:
        num_cores = config['simulation']['cpu_cores']
        if num_cores and num_cores > 0:
            os.environ['OMP_NUM_THREADS'] = str(num_cores)
            print(f"\n---> Set simulator CPU cores to {num_cores} <---\n")

    # --- 2. Initialize Objects from Config ---
    tokeniser = SpacyTokeniser()
    parser = BobcatParser()
    rewriter = Rewriter(config['qnlp']['rewrite_rules'])
    
    N = AtomicType.NOUN
    S = AtomicType.SENTENCE
    OB_MAP: dict[Ty, int] = { N: 1, S: 1 }# pyright: ignore

    ansatz = IQPAnsatz(OB_MAP, n_layers=config['qnlp']['n_layers'])# pyright: ignore

    # --- 3. Load FastText & Preprocess Data ---
    word_to_idx, embeddings = load_fasttext_embeddings(config['data']['fasttext_path'])
    
    sentences1, sentences2, value = load_data(
        config['data']['path'], 
        sample_fraction=config['data']['sample_fraction']
    )
    data_pairs = list(zip(sentences1, sentences2, value))
    
    filtered_data, symbols, n_max = preprocess_data_for_model(
        data_pairs, tokeniser, ansatz, parser, rewriter, 
        qubit_limit=config['data']['qubit_limit']
    )
    
    if filtered_data and n_max > 0:
        # --- 4. Create Datasets with Train/Test Split ---
        labels = [pair['label'] for pair in filtered_data]
        train_data_raw, test_data = train_test_split(
            filtered_data, test_size=0.2, random_state=42, stratify=labels
        )
        training_data = create_balanced_training_set(train_data_raw)
        
        print(f"\nOriginal data split into {len(training_data)} training pairs and {len(test_data)} test pairs.")
        
        # --- 5. Initialize Model and Train ---
        model = QNLPModel(symbols, word_to_idx, embeddings)
        device_name = config['simulation']['device']
        trained_model, loss_history, param_history = train_model(
            model,  # Pass the initialized model
            training_data, 
            n_max,
            device_name,
            base_learning_rate=config['training']['base_learning_rate'],
            lambda_penalty=config['training']['lambda_penalty'],
            epochs=config['training']['epochs'],
            embedding_method=config['qnlp']['embedding_method']
        )

        # --- 6. Evaluate and Plot ---
        evaluate_model(trained_model, test_data, n_max, device_name, embedding_method=config['qnlp']['embedding_method'])
        
        print("\n--- Training Analysis ---")
        plot_training_history(loss_history)
        plot_parameter_evolution_polar(param_history)
        plot_parameter_deltas(param_history)
    else:
        print("\nNo data to train on. Please check your config file.")

    print("\nProgram finished.")

if __name__ == '__main__':
    config_file_path = 'config.yaml'
    main(config_file_path)

Configuration loaded:
data:
  fasttext_path: C:/Users/Jash/Documents/Research/Semantic Equivilance/SemanticEquivilance/crawl-300d-2M.vec
  path: C:/Users/Jash/Documents/Research/Semantic Equivilance/SemanticEquivilance/question_pairs/questions.csv
  qubit_limit: 12
  sample_fraction: 0.05
  test_size: 0.2
qnlp:
  embedding_method: entangle
  n_layers: 1
  rewrite_rules:
  - curry
  - prepositional_phrase
  - determiner
simulation:
  cpu_cores: 6
  device: lightning.qubit
training:
  base_learning_rate: 0.001
  epochs: 5
  lambda_penalty: 0.1


---> Set simulator CPU cores to 6 <---

Loading FastText embeddings from C:/Users/Jash/Documents/Research/Semantic Equivilance/SemanticEquivilance/crawl-300d-2M.vec...
Loaded 1999996 word vectors.
Loaded 20218 sentence pairs.
Starting preprocessing with a qubit limit of 12...
Preprocessing complete. Found 498 valid pairs.
Total unique parameters (symbols) found: 3973
N_Max for the filtered dataset is: 12
Created a balanced training set with 142 p

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn