In [347]:
import numpy as np 
import pandas as pd 
from tqdm import tqdm 
from discopro.grammar import tensor
from lambeq import BobcatParser, NumpyModel, AtomicType, Rewriter, Dataset, QuantumTrainer, SPSAOptimizer , AtomicType, IQPAnsatz, RemoveCupsRewriter
from lambeq.backend.grammar import Spider
from lambeq.rewrite import UnifyCodomainRewriter
from lambeq.backend.drawing import draw
from lambeq.backend.grammar import Ty
from tqdm import tqdm
import random
import datetime
from discopro.anaphora import connect_anaphora_on_top
from contextuality.model import Model, Scenario, CyclicScenario
from lambeq.backend.quantum import Box, qubit, SelfConjugate, Ry, Diagram

In [348]:
remove_cups = RemoveCupsRewriter()

parser = BobcatParser()
rewriter = Rewriter(['auxiliary',
                     'connector',
                     'coordination',
                     'determiner',
                     'object_rel_pronoun',
                     'subject_rel_pronoun',
                     'postadverb',
                     'preadverb',
                     'prepositional_phrase'])

N = AtomicType.NOUN
S = AtomicType.SENTENCE
P = AtomicType.PREPOSITIONAL_PHRASE 

ansatz = IQPAnsatz({N: 1, S: 1, P:1}, n_layers=1, n_single_qubit_params=3) 

In [349]:
def generate_diagram(diagram, pro, ref):

    pro_box_idx = next(i for i, box in enumerate(diagram.boxes) if box.name.casefold() == pro.casefold())
    ref_box_idx = next(i for i, box in enumerate(diagram.boxes) if box.name.casefold() == ref.casefold())
    final_diagram = connect_anaphora_on_top(diagram, pro_box_idx, ref_box_idx)
    rewritten_diagram = rewriter(remove_cups(final_diagram)).normal_form()
    return rewritten_diagram

In [350]:
def sent2dig(sentence1: str, sentence2: str, pro: str, ref: str):
    diagram1 = parser.sentence2diagram(sentence1)
    diagram2 = parser.sentence2diagram(sentence2)
    diagram = tensor(diagram1,diagram2)
    #diagram = diagram >> Spider(S, 2, 1)
    diagram = generate_diagram(diagram, pro, ref)
    #merger = UnifyCodomainRewriter(Ty('s'))
    #diagram = merger(diagram)
    return diagram

In [356]:
def gen_labels(df: pd.DataFrame):
    circuits, labels, diagrams = [],[],[]
    #selected_cols = [random.choice(['referent', 'wrong_referent']) for i in range(len(df))]
    for i, row in tqdm(df.iterrows(), total=len(df)):
        #ref = row[selected_cols[i]]
        # label = [[0.25, 0.25],[0.25, 0.25]] if selected_cols[i] == 'referent' else [[0.25, 0.25],[0.25, 0.25]]
        # sent1, sent2, pro = row[['sentence1', 'sentence2', 'pronoun']]
        label = [[0.25, 0.25],[0.25, 0.25]]
        sent1, sent2, pro, ref = row[['sentence1', 'sentence2', 'pronoun', 'referent']]

        try:
            diagram = sent2dig(sent1.strip(), sent2.strip(), pro.strip(), ref.strip())
            diagrams.append(diagram)
            circ = ansatz(diagram)
            circuits.append(circ)
            labels.append(label)
        except Exception as e:
            print("Error: ", e)
    return circuits, labels, diagrams

In [357]:
df_train = pd.read_csv('dataset/original_data/train.csv', index_col=0)
df_val = pd.read_csv('dataset/original_data/val.csv', index_col=0)
df_test = pd.read_csv('dataset/original_data/test.csv', index_col=0)

In [358]:
train_circuits, train_labels, train_diagrams = gen_labels(df_train[:10])
val_circuits, val_labels, val_diagrams = gen_labels(df_val[:10])
test_circuits, test_labels, test_diagrams = gen_labels(df_test[:10])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  2.57it/s]
 20%|████████████████████████                                                                                                | 2/10 [00:00<00:03,  2.58it/s]

Error:  Diagram 0 (cod=n @ n.r @ n @ n) does not compose with diagram 1 (dom=n @ p.r @ n)


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  2.78it/s]


Error:  Diagram 0 (cod=) does not compose with diagram 1 (dom=qubit)


 20%|████████████████████████                                                                                                | 2/10 [00:00<00:02,  2.95it/s]

Error:  Diagram 0 (cod=) does not compose with diagram 1 (dom=qubit)


 50%|████████████████████████████████████████████████████████████                                                            | 5/10 [00:01<00:01,  3.02it/s]

Error:  Diagram 0 (cod=) does not compose with diagram 1 (dom=qubit)


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  2.96it/s]


In [None]:
from lambeq.training import BinaryCrossEntropyLoss
all_circuits = train_circuits + val_circuits + test_circuits
model = NumpyModel.from_diagrams(all_circuits, use_jit=True)
loss = lambda y_hat, y: -np.sum(y * np.log(y_hat)) / len(y)  # binary cross-entropy loss
acc = lambda y_hat, y: np.sum(np.round(y_hat) == np.array(y)) / len(y) / 2  # half due to double-counting
eval_metrics = {"acc": acc}

In [None]:
def main(EPOCHS: int, SEED: int, BATCH_SIZE: int) -> None:

    trainer = QuantumTrainer(
        model,
        loss_function=loss,
        epochs=EPOCHS,
        optimizer=SPSAOptimizer,
        optim_hyperparams={'a': 0.1, 'c': 0.06, 'A': 0.01 * EPOCHS},
        evaluate_functions=eval_metrics,
        evaluate_on_train=True,
        verbose='text',
        seed=SEED)

    train_dataset = Dataset(train_circuits, train_labels, batch_size=BATCH_SIZE)
    val_dataset = Dataset(val_circuits, val_labels, shuffle=False)

    now = datetime.datetime.now()
    t = now.strftime("%Y-%m-%d_%H_%M_%S")
    print(t)
    trainer.fit(train_dataset, val_dataset, eval_interval=1, log_interval=1)
    test_acc = acc(model(test_circuits), test_labels)
    print('Test accuracy:', test_acc)

In [342]:
SEED = random.choice([0, 10, 50, 77, 100, 111, 150, 169, 200, 234, 250, 300, 350, 400, 450])
BATCH_SIZE = 5
EPOCHS = 100

In [None]:
main(EPOCHS, SEED, BATCH_SIZE)

In [345]:
def gen_emp(diags: [Diagram], model: NumpyModel) -> Model:
    scenario = CyclicScenario(['a','b','A','B'],2)
    prob_dist = []
    for diag in diags:
        prob_dist.append(model.get_diagram_output([diag])[0].flatten())
    return Model(scenario, prob_dist)

In [None]:
best_model = NumpyModel.from_checkpoint('runs/Jun18_11-50-11_TLS-MBP.local/best_model.lt')
best_model.initialise_weights()

In [None]:
res = dict()

for i, row in tqdm(df_test.iterrows(), total=len(df_test)):
    s1, s2, p, n = row[['sentence1','sentence2','pronoun','referent']]
    diag = ansatz(sent2dig(s1, s2, p, n))
    
    diagXX = diag.apply_gate(Ry(0),0)
    diagXX = diagXX.apply_gate(Ry(np.pi/8),1)
    
    diagXZ = diag.apply_gate(Ry(0),0)
    diagXZ = diagXZ.apply_gate(Ry(3*np.pi/8),1)
    
    diagZX = diag.apply_gate(Ry(np.pi/4),0)
    diagZX = diagZX.apply_gate(Ry(np.pi/8),1)
    
    diagZZ = diag.apply_gate(Ry(np.pi/4),0)
    diagZZ = diagZZ.apply_gate(Ry(3*np.pi/8),1)

    emp_model = gen_emp([diagXX, diagXZ, diagZX, diagZZ], best_model)

    res[s1+s2] = (emp_model.contextual_fraction(),
                  emp_model.signalling_fraction(), 
                  emp_model.CbD_direct_influence(),
                  emp_model.CbD_measure())

In [346]:
import json 
res = dict()
f = open("output.json", "w")
for item in res:
    f.write(json.dump(item))
    f.write("\n")