In [50]:
from fidelity_model import FidelityModel, fidelity_pqc_gen
import pandas as pd
from tqdm import tqdm
import numpy as np

In [17]:
def read_file(path, displayname=""):
    print(f"Reading {displayname}...", end="")
    csvfile = pd.read_csv(path)
    pairs = [[pair['sentence_1'], pair['sentence_2']] for i,pair in csvfile.iterrows()]
    labels = [pair['label'] for i,pair in csvfile.iterrows()]
    print("Done")
    return pairs[:500], labels[:500]

In [36]:
train_pairs, train_labels = read_file(f"model_training/data/train_data.csv", "Train Data")
val_pairs, val_labels = read_file(f"model_training/data/val_data.csv", "Val Data")

Reading Train Data...Done
Reading Val Data...Done


In [19]:
def generate_circuits(pairs, language_model, description="Generating Circuits"):
    progress_bar = tqdm(pairs, bar_format="{desc}{percentage:3.0f}%|{bar:25}{r_bar}")
    progress_bar.set_description(description)
    circuits = [fidelity_pqc_gen(sentence_1, sentence_2, language_model) for sentence_1, sentence_2 in progress_bar]
    return circuits

In [20]:
train_circuits = generate_circuits(train_pairs, 1, "Generating Train Circuits")
val_circuits = generate_circuits(val_pairs, 1, "Generating Val Circuits")

Generating Train Circuits: 100%|█████████████████████████| 500/500 [10:44<00:00,  1.29s/it]
Generating Val Circuits: 100%|█████████████████████████| 100/100 [02:08<00:00,  1.29s/it]


## SBERT Similarities

In [None]:
model = FidelityModel()
model.load("model_training/runs/a-1.0-DisCoCat-120_epochs_similarity/best_model.lt")

In [56]:
train_output = model(train_circuits)
val_output = model(val_circuits)
print(f"Train Correlation\n {np.corrcoef(train_labels, train_output)}")
print(f"Val Correlation\n {np.corrcoef(val_labels, val_output)}")

Train Correlation
 [[1.         0.80701653]
 [0.80701653 1.        ]]
Val Correlation
 [[1.         0.79642708]
 [0.79642708 1.        ]]


In [87]:
THRESHOLD = 0.1
print(f"Train Accuracy: {np.count_nonzero(np.abs(train_labels-train_output) < THRESHOLD)/500}")
print(f"Val Accuracy: {np.count_nonzero(np.abs(val_labels-val_output) < THRESHOLD)/100}")

Train Accuracy: 0.33
Val Accuracy: 0.33
