# Applications of our model

### Imports of utils functions

In [1]:
import pennylane as qml
from pennylane import numpy as np
from config import config
from utils import circuit_final, encode_words
import torch
from torch.autograd import Variable
from sklearn.decomposition import PCA
import pickle
from time import time
import matplotlib.pyplot as plt

qml.enable_tape()
num_words = config['NUM_WORDS']
qbits_per_word = config['QUBITS_PER_WORDS']
num_layers = config['NUM_LAYERS']


my_bucket = f"amazon-braket-edb2457fc968" # the name of the bucket
my_prefix = "Variational-NLP" # the name of the folder in the bucket
s3_folder = (my_bucket, my_prefix)

device_arn = "arn:aws:braket:::device/quantum-simulator/amazon/sv1"

In [2]:
n_dim = 2**qbits_per_word
max_length = num_words

embeddings = np.load("dummy_dataset/embeddings.npy")
sentences = np.load("dummy_dataset/dummy_sentences_5.npy").astype(int)
#labels = np.load('newsgroup/labels.npy')

np.random.seed(143)
missing_word = np.random.randint(0, num_words, size=len(sentences)).astype(int)#.numpy()

np.random.seed(32)
np.random.shuffle(sentences)

norms = np.linalg.norm(embeddings, axis=1)
pca = PCA(n_dim)
embeddings_reduced = np.zeros((embeddings.shape[0], n_dim))
embeddings_reduced[norms>0] = pca.fit_transform(embeddings[norms>0])

norms_reduced = np.linalg.norm(embeddings_reduced, axis=1).reshape(-1,1)
embeddings_reduced_norm = np.zeros_like(embeddings_reduced)#.numpy()
embeddings_reduced_norm[norms>0] = embeddings_reduced[norms>0] / np.repeat(norms_reduced[norms>0], n_dim, axis=1)

embeddings_reduced_norm.requires_grad = False
sentences_truncated = sentences[:,0:max_length]
sentences_truncated.requires_grad = False

missing_word.requires_grad = False

all_indices = np.repeat(np.arange(max_length).reshape((1,-1)), len(sentences), axis=0).astype(int)#.numpy()
for i in range(len(sentences)):
    all_indices[i, missing_word[i]] = max_length
all_indices.requires_grad = False

with open('dummy_dataset/vocab.p', 'rb') as readfile:
    vocab = pickle.load(readfile)

word_to_id = vocab
id_to_word = {value:key for key,value in vocab.items() if np.linalg.norm(embeddings_reduced_norm[int(value)])>0}

word_indices = np.array(list(id_to_word.keys()))

In [3]:
n_wires = qbits_per_word * (num_words+1) + 1

dev_remote = qml.device(
    "braket.aws.qubit",
    device_arn=device_arn,
    wires=n_wires,
    s3_destination_folder=s3_folder,
    parallel=True
)

dev_local = qml.device("braket.local.qubit", wires=n_wires, shots=1000)

dev = dev_local
#dev = dev_remote

@qml.qnode(dev)
def compute_overlap_words(parameters, embeddings, indices, target_word, wires=dev.wires):
    encode_words(embeddings, indices)
    params = [(parameters[:,0,i], parameters[:,1::,i]) for i in range(num_layers)]
    circuit_final(params, wires, num_layers, target_word)
    return qml.expval(qml.PauliZ(wires[-1]))


def cost(parameters, sentences, missing_words):
    cost = 0    
    for i,sentence in enumerate(sentences):
        embeddings = embeddings_reduced_norm[sentence]
        indices = all_indices[i]
        m_w = missing_words[i]
        cost += compute_overlap_words(parameters, embeddings, indices, target_word = m_w)
    return cost

### Load the parameters

In [42]:
#parameters = np.random.rand(qbits_per_word, int(np.ceil(num_words/2))+1, num_layers)
parameters = np.load('saved_parameters/5_words/params_3_430.npy')

In [5]:
print(list(vocab.keys()))

['woman', 'man', 'chef', 'policeman', 'dog', 'cat', 'apple', 'fish', 'teacher', 'toy', 'kid', 'vegetable', 'doctor', 'car', 'boat', 'bird', 'meat', 'professor', 'president', 'student', 'chair', 'table', 'big', 'old', 'young', 'tiny', 'long', 'heavy', 'blue', 'strong', 'red', 'discret', 'tender', 'rotten', 'gentle', 'funny', 'sad', 'light', 'complex', 'green', 'cheap', 'expensive', 'eat', 'cut', 'cook', 'burn', 'fix', 'repair', 'build', 'hit', 'take', 'make', 'bake', 'paint', 'throw', 'push', 'create', 'look', 'pick', 'chop']


## Predict the missing word

The objective is to fill a blank in a sentence.

In [38]:
def get_most_probable_word(sentence, position, look_in=None):
    assert position<num_words
    if look_in is None:
        look_in = np.arange(len(word_indices)).astype(int)
    indices = []
    
    for i in range(num_words):
        if i!=position:
            indices.append(i)
    indices.append(num_words)       
    probas = []
    embeddings_input = embeddings_reduced_norm[sentence]
    for i,index in enumerate(word_indices[look_in]):
        embeddings = np.concatenate([embeddings_input, embeddings_reduced_norm[index].reshape((1,-1))], axis=0)
        probas.append(float(compute_overlap_words(parameters, embeddings, indices, target_word = position)))
    return probas

input_sentence = 'funny [mask] eat cheap vegetable'

list_words = input_sentence.split(' ')
list_index = []
missing_index = 0
for i,word in enumerate(list_words):
    if word=='[mask]':
        missing_index = i
    else:
        list_index.append(vocab[word])

np.random.seed(23)
#look_in = np.random.randint(len(word_indices), size=10).astype(int)
p = get_most_probable_word(list_index, missing_index, look_in=None)

#print(np.argsort(p)[::-1])

print("The 5 most probable words are: ")
print(' '.join(id_to_word[int(i)] for i in np.argsort(p)[::-1][0:5]))

The 5 most probable words are: 
woman cut man fix throw


In [43]:
sentence = 'strong kid'

for i in range(len(sentence.split(' ')), num_words):
    list_words = sentence.split(' ')
    list_index = []
    missing_index = 0
    for i,word in enumerate(list_words):
        list_index.append(vocab[word])

    look_in = None#np.random.randint(len(word_indices), size=3).astype(int)
    p = get_most_probable_word(list_index, len(list_words), look_in=look_in)
    
    new_word = id_to_word[int(np.argmax(p))]
    
    sentence = sentence + ' ' + new_word
    
print(sentence)

strong kid tiny light boat
