Implementation as a classification task

In [1]:
import discopy
import lambeq
from discopy.grammar.pregroup import Ty, Word, Cup, Diagram
from discopy.quantum import Circuit, sqrt, Ket, H, Rx, CX, SWAP,Bra
from pytket.extensions.qiskit import tk_to_qiskit, AerBackend
import numpy as np
from discopy import CircuitFunctor, qubit
from collections import defaultdict
from nltk.tokenize import word_tokenize
from random import shuffle
import random
from sklearn.metrics import mean_squared_error
from scipy.special import softmax
import qiskit



In [2]:
parser = lambeq.BobcatParser()

In [3]:
counter = -1
m = 1

In [4]:
corpus = [('Alice loves Bob', 2)]

In [5]:
params = defaultdict(lambda:0.1)
def initialize_params():
    for data_point in corpus:
        sen = data_point[0]
        for word in word_tokenize(sen):
            params[word] = random.random()

initialize_params()
params

defaultdict(<function __main__.<lambda>()>,
            {'Alice': 0.0651797996169392,
             'loves': 0.5117816891714322,
             'Bob': 0.5704345243099592})

In [6]:
vocab = params.keys()

In [7]:
def ansatz_cod_len_1(phase):
    return Ket(0)>>Rx(phase)
def ansatz_cod_len_1_masked():
    return Ket(0)
def ansatz_cod_len_2(phase):
    return Ket(0)>>Rx(phase)
def ansatz_cod_len_2_masked():
    return Ket(0)
def ansatz_cod_len_3(phase):
    return Ket(0,0) >> H @ Rx(phase) >> CX
def ansatz_cod_len_3_masked():
    return Ket(0,0) >> H @ Circuit.id(1) >> CX
def ansatz_cod_len_4(phase):
    return Ket(0, 0, 0) >> Rx(phase) @ Circuit.id(1) @ Circuit.id(1) >> H @ Circuit.id(1) @ Circuit.id(1) >> CX @ Circuit.id(1) >> Circuit.id(1) @ CX
def ansatz_cod_len_4_masked():
    return Ket(0, 0, 0) >> H @ Circuit.id(1) @ Circuit.id(1) >> CX @ Circuit.id(1) >> Circuit.id(1) @ CX

In [8]:
s, n = Ty('s'), Ty('n')
def cnot_ar(box):
    global counter

    cod = len(box.cod)
    box_name = str(box)
    counter += 1
    if cod == 1 and counter != m:
        return ansatz_cod_len_1(params[box_name])
    elif cod == 1 and counter == m:
        return ansatz_cod_len_1_masked()

    elif cod == 2 and counter != m:
        return ansatz_cod_len_2(params[box_name])
    elif cod == 2 and counter == m:
        return ansatz_cod_len_2_masked()

    elif cod == 3 and counter != m:
        return ansatz_cod_len_3(params[box_name])
    elif cod == 3 and counter == m:
        return ansatz_cod_len_3_masked()

    elif cod == 4 and counter != m:
        return ansatz_cod_len_4(params[box_name])
    elif cod == 4 and counter == m:
        return ansatz_cod_len_4_masked()

def generate_functor():
    Func = CircuitFunctor(
    ob={s: qubit ** 0, n: qubit ** 1},
    ar=cnot_ar)
    return Func


In [9]:
test_dict = defaultdict()
def build_test_dict():
    for data_point in corpus:
        sen = data_point[0]
        if sen not in test_dict.keys():
            dia = generate_functor()(parser.sentence2diagram(sen))
            circ_eval = Circuit.eval(
                dia,
                backend=AerBackend(),
                n_shots=1024,
                seed=1,
                compilation=AerBackend().default_compilation_pass(2))
            test_dict[sen] = np.abs(circ_eval.array)
build_test_dict()
test_dict

defaultdict(None, {'Alice loves Bob': 0.07421875000000003})

In [10]:
loss =  mean_squared_error

In [11]:
epochs = 150
lr = 0.5

In [12]:
def update(loss, updating_params):
    for param in updating_params:
        params[param] = params[param] + lr * loss

In [14]:
def train():
    for _ in range(1):
        for data_point in corpus:
            global m
            m=data_point[1]

            global counter
            counter = -1

            sen = data_point[0]

            masked_sentence = " ".join(["MASK" if i == m else word for i, word in enumerate(sen.split())])

            masked_list=[masked_sentence.replace("MASK", name) for name in params.keys()]


            y_true=[1 if tryout == sen else 0 for tryout in masked_list ]


            y_pred = []
            for tryout in masked_list:
                circ = generate_functor()(parser.sentence2diagram(tryout))
                circ_eval = Circuit.eval(
                    circ,
                    backend=AerBackend(),
                    n_shots=1024,
                    seed=1,
                    compilation=AerBackend().default_compilation_pass(2))
                rep = circ_eval.array
                y_pred.append(1-abs(rep-test_dict[sen])) #decimal bibl
            logits = softmax(y_pred)
            ls = loss(y_true, logits)
            updating_params = []
            for token in word_tokenize(sen):
                if word_tokenize(sen).index(token) != m:
                    updating_params.append(str(token))
            update(ls, updating_params)
            print(rep)


In [15]:
train()

(0.33203125000000017+0j)


In [3]:
loss([0.23,0.45],[0.19,0.49])

0.0015999999999999994

In [15]:
import numpy as np
import spsa
l={1:5,2:6,3:7}
def sphere1(x) -> float:
    return sum([(e[0]-e[1])**2 for e in x])
spsa.minimize(sphere1, [(1,5),(2,6),(3,7)])


array([[0.85870968, 0.85870968],
       [0.48411926, 0.48411926],
       [2.71450412, 2.71450412]])

In [43]:
def sphere(x: np.ndarray) -> float:
    return x**2

In [44]:
print(spsa.minimize(sphere,0.6))


-1.9847520485605409e-19
