In [1]:
%matplotlib notebook
from matplotlib import rc
rc('text', usetex=True)
import matplotlib.pyplot as plt

In [2]:
from PyQNLPSimulator import PyQNLPSimulator as p
import QNLP as q
import numpy as np

from itertools import product
import tempfile

In [16]:
vsm = q.VectorSpaceModel.VectorSpaceModel(
    corpus_path="/Users/mlxd/Desktop/qs_dev/intel-qnlp/corpus/84-0.txt", 
    mode='l', 
    stop_words=False
)

In [17]:
num_basis_elems = 32
basis     = vsm.define_basis({'verbs' : num_basis_elems, 'nouns' : num_basis_elems})

In [25]:
vsm.sort_basis_tokens_by_dist("verbs")
vsm.sort_basis_tokens_by_dist("nouns")

{'verbs': ['seemed',
  'came',
  'heard',
  'feeling',
  'feel',
  'know',
  'see',
  'appeared',
  'became',
  'passed',
  'made',
  'felt',
  'thought',
  'found',
  'saw',
  'said'],
 'nouns': ['hand',
  'word',
  'creature',
  'mind',
  'towards',
  'heart',
  'death',
  'Elizabeth',
  'night',
  'father',
  'friend',
  'time',
  'life',
  'eye',
  'day',
  'man']}

In [26]:
vsm.assign_indexing("nouns");
vsm.assign_indexing("verbs")

{'nouns': {'hand': 0,
  'word': 1,
  'creature': 3,
  'mind': 2,
  'towards': 6,
  'heart': 7,
  'death': 5,
  'Elizabeth': 4,
  'night': 12,
  'father': 13,
  'friend': 15,
  'time': 14,
  'life': 10,
  'eye': 11,
  'day': 9,
  'man': 8},
 'verbs': {'seemed': 0,
  'came': 1,
  'heard': 3,
  'feeling': 2,
  'feel': 6,
  'know': 7,
  'see': 5,
  'appeared': 4,
  'became': 12,
  'passed': 13,
  'made': 15,
  'felt': 14,
  'thought': 10,
  'found': 11,
  'saw': 9,
  'said': 8}}

In [27]:
# Define basis tokens encoding and decoding dicts
encoding_dict = {"ns" : vsm.encoded_tokens["nouns"],
                 "v"  : vsm.encoded_tokens["verbs"],
                 "no" : vsm.encoded_tokens["nouns"]
                }

decoding_dict = {"ns" : { v:k for k,v in encoding_dict["ns"].items() },
                 "v"  : { v:k for k,v in encoding_dict["v"].items() },
                 "no" : { v:k for k,v in encoding_dict["no"].items() }
                }

In [32]:
# Register must be large enough to support 2*|nouns| + |verbs|
len_reg_memory = len(verb_dist) + 2*len(noun_dist)
len_reg_ancilla = len_reg_memory + 2
num_qubits = len_reg_memory + len_reg_ancilla
print("""{}
Requires {} qubits to encode data using {} 
basis elements in each space, allowing a 
maximum of {} unique patterns.
{}
""".format("#"*48, num_qubits, num_basis_elems, 2**num_qubits, "#"*48)
)

################################################
Requires 14 qubits to encode data using 32 
basis elements in each space, allowing a 
maximum of 16384 unique patterns.
################################################



In [34]:
"""
Require analysis of corpus to determine number of patterns to encode. The current 
encoding method assumes unique patterns, though this can be extended to multiple 
occurrences of the same patterns later. Additionally, we can store 2^num_qubits 
unique patterns. As such, we will restrict the patterns to encode by limiting the
space over which we analyse the corpus-to-basis mapping.
"""
num_bin_pattern = 8

In [35]:
text = """
cats eat tuna.
dogs eat everything.
cats kill birds.
people feed dogs.
bakers use ingredients.
"""
import nltk
token_words = nltk.word_tokenize(text)
nltk.pos_tag(token_words)

[('cats', 'NNS'),
 ('eat', 'VBP'),
 ('tuna', 'NN'),
 ('.', '.'),
 ('dogs', 'NNS'),
 ('eat', 'VBD'),
 ('everything', 'NN'),
 ('.', '.'),
 ('cats', 'NNS'),
 ('kill', 'VBP'),
 ('birds', 'NNS'),
 ('.', '.'),
 ('people', 'NNS'),
 ('feed', 'VBP'),
 ('dogs', 'NNS'),
 ('.', '.'),
 ('bakers', 'NNS'),
 ('use', 'VBP'),
 ('ingredients', 'NNS'),
 ('.', '.')]

In [36]:
fp = tempfile.NamedTemporaryFile(mode='w')
fp.write(text);
fp.flush()

In [37]:
vsm2 = q.VectorSpaceModel.VectorSpaceModel(
    corpus_path=fp.name, 
    mode=0, 
    stop_words=False
)
fp.close()

In [38]:
num_basis_elems2 = 2
basis2     = vsm2.define_basis({'verbs' : num_basis_elems, 'nouns' : num_basis_elems})

verb_dist2 = vsm2.sort_basis_tokens_by_dist("verbs")
noun_dist2 = vsm2.sort_basis_tokens_by_dist("nouns")

In [41]:
vsm2.assign_indexing("nouns");
vsm2.assign_indexing("verbs")

{'nouns': {'ingredients': 0,
  'people': 1,
  'bakers': 3,
  'birds': 2,
  'everything': 6,
  'tuna': 7,
  'dogs': 5,
  'cats': 4},
 'verbs': {'use': 0, 'feed': 1, 'kill': 3, 'eat': 2}}

In [None]:
fp.n