In [1]:
from gensim.models import KeyedVectors
from nltk import pos_tag, word_tokenize
from nltk.stem import PorterStemmer
import pickle

In [2]:
wv_file = 'py_files/saved_objects/poetic_embeddings.300d.txt'
# wv_file = '~/Downloads/glove.6B/glove.6B.300d.w2v.txt'
postag_file = 'py_files/saved_objects/postag_dict_all.p'
poetic_vectors = KeyedVectors.load_word2vec_format(wv_file, binary=False)

with open(postag_file, 'rb') as f:
    postag_dict = pickle.load(f)
pos_to_words = postag_dict[1]
words_to_pos = postag_dict[2]

In [3]:
pos_tag(word_tokenize('There once was a girl named Mary, who loved to read in the library. She borrowed a thick book, and found a nice nook, and flew away like a fairy'))

[('There', 'EX'),
 ('once', 'RB'),
 ('was', 'VBD'),
 ('a', 'DT'),
 ('girl', 'NN'),
 ('named', 'VBN'),
 ('Mary', 'NNP'),
 (',', ','),
 ('who', 'WP'),
 ('loved', 'VBD'),
 ('to', 'TO'),
 ('read', 'VB'),
 ('in', 'IN'),
 ('the', 'DT'),
 ('library', 'NN'),
 ('.', '.'),
 ('She', 'PRP'),
 ('borrowed', 'VBD'),
 ('a', 'DT'),
 ('thick', 'JJ'),
 ('book', 'NN'),
 (',', ','),
 ('and', 'CC'),
 ('found', 'VBD'),
 ('a', 'DT'),
 ('nice', 'JJ'),
 ('nook', 'NN'),
 (',', ','),
 ('and', 'CC'),
 ('flew', 'VBD'),
 ('away', 'RB'),
 ('like', 'IN'),
 ('a', 'DT'),
 ('fairy', 'NN')]

In [4]:
storyline = ['mary', 'library', 'book', 'nook', 'fairy']
template = [['EX', 'RB', 'VBD', 'DT', 'NN', 'VBN', 'NNP'],
             ['WP', 'VBD', 'TO', 'VB', 'IN', 'DT', 'NN'],
             ['PRR', 'VBD', 'DT', 'JJ', 'NN'],
             ['CC', 'VBD', 'DT', 'JJ', 'NN'],
             ['CC', 'VBD', 'RB', 'IN', 'DT', 'NN']]

In [5]:
poetic_vectors.most_similar_cosmul(positive=[poetic_vectors.get_vector('mary'), poetic_vectors.get_vector('library')], topn=10)

[('library', 4.65390157699585),
 ('mary', 4.639703273773193),
 ('elizabeth', 3.633802890777588),
 ('chapel', 3.4066123962402344),
 ('hall', 3.095370054244995),
 ('dedicated', 3.0537571907043457),
 ('margaret', 3.0534539222717285),
 ('college', 3.0436367988586426),
 ('church', 2.99871563911438),
 ('librarian', 2.894843816757202)]

In [16]:
def similar_pos(words, pos, topn=200):
    matching_words = []
    embeddings = [poetic_vectors.get_vector(w) for w in words]
    for candidate_word, similarity in poetic_vectors.most_similar(positive=embeddings, topn=topn):
        if pos in words_to_pos[candidate_word]:
            matching_words.append(candidate_word)
    return matching_words

In [8]:
similar_pos(['mary', 'library'], ['VBD'])

['housed', 'established', 'became', 'devoted', 'wrote', 'sacred']

In [31]:
def get_sliding_window_words(storyline, window_padding=1):
    context_words = []
    for i, word in enumerate(storyline):
        start = max(0, i - window_padding)
        end = min(len(storyline), i + window_padding) + 1
        words = storyline[start:end]
        context_words.append(words)
    return context_words

def fill_in_template(template, storyline, fill_pos):
    print(template)
    empty_template = []
    for i, line in enumerate(template):
        empty_line = ['' for j in range(len(line) - 1)]
        empty_line.append(storyline[i])
        empty_template.append(empty_line)
    context_words = get_sliding_window_words(storyline)
    ps = PorterStemmer()
    used_words = set([ps.stem(w) for w in storyline])
    for i, line in enumerate(template):
        for j, pos in enumerate(line):
            if pos == 'EX':
                empty_template[i][j] = 'there'
            if pos == 'TO':
                empty_template[i][j] = 'to'
            if pos == 'WP$':
                empty_template[i][j] = 'whose'
            if pos == 'SO':
                empty_template[i][j] = 'so'
            if pos == 'WHO':
                empty_template[i][j] = 'who'
            if pos == 'THAN':
                empty_template[i][j] = 'than'
            if pos == 'AS':
                empty_template[i][j] = 'as'
            if pos == 'WHEN':
                empty_template[i][j] = 'when'
            if pos == 'IF':
                empty_template[i][j] = 'if'
            if pos == 'POS':
                empty_template[i][j] = '\'s'
            if pos == 'WHILE':
                empty_template[i][j] = 'while'
            if pos in fill_pos and j < len(line) - 1:
                fill_words = [w for w in similar_pos(context_words[i], pos) if ps.stem(w) not in used_words]
                if len(fill_words) > 0:
                    empty_template[i][j] = fill_words[0]
                    used_words.add(ps.stem(fill_words[0]))
    return empty_template

In [38]:
fill_in_template(template, storyline, ['VBD'])

[['EX', 'RB', 'VBD', 'DT', 'NN', 'VBN', 'NNP'], ['WP', 'VBD', 'TO', 'VB', 'IN', 'DT', 'NN'], ['PRR', 'VBD', 'DT', 'JJ', 'NN'], ['CC', 'VBD', 'DT', 'JJ', 'NN'], ['CC', 'VBD', 'RB', 'IN', 'DT', 'NN']]


[['there', '', 'housed', '', '', '', 'mary'],
 ['', 'wrote', 'to', '', '', '', 'library'],
 ['', 'read', '', '', 'book'],
 ['', 'enchanted', '', '', 'nook'],
 ['', 'eared', '', '', '', 'fairy']]

In [19]:
fill_in_template(template, storyline, ['JJ'])

[['EX', 'RB', 'VBD', 'DT', 'NN', 'VBN', 'NNP'], ['WP', 'VBD', 'TO', 'VB', 'IN', 'DT', 'NN'], ['PRR', 'VBD', 'DT', 'JJ', 'NN'], ['CC', 'VBD', 'DT', 'JJ', 'NN'], ['CC', 'VBD', 'RB', 'IN', 'DT', 'NN']]


[['', '', '', '', '', '', 'mary'],
 ['', '', '', '', '', '', 'library'],
 ['', '', '', 'novel', 'book'],
 ['', '', '', 'comic', 'nook'],
 ['', '', '', '', '', 'fairy']]

In [20]:
fill_in_template(template, storyline, ['RB'])

[['EX', 'RB', 'VBD', 'DT', 'NN', 'VBN', 'NNP'], ['WP', 'VBD', 'TO', 'VB', 'IN', 'DT', 'NN'], ['PRR', 'VBD', 'DT', 'JJ', 'NN'], ['CC', 'VBD', 'DT', 'JJ', 'NN'], ['CC', 'VBD', 'RB', 'IN', 'DT', 'NN']]


[['', 'later', '', '', '', '', 'mary'],
 ['', '', '', '', '', '', 'library'],
 ['', '', '', '', 'book'],
 ['', '', '', '', 'nook'],
 ['', '', 'lovely', '', '', 'fairy']]

In [37]:
fill_in_template(template, storyline, ['VBD', 'JJ', 'RB'])

[['EX', 'RB', 'VBD', 'DT', 'NN', 'VBN', 'NNP'], ['WP', 'VBD', 'TO', 'VB', 'IN', 'DT', 'NN'], ['PRR', 'VBD', 'DT', 'JJ', 'NN'], ['CC', 'VBD', 'DT', 'JJ', 'NN'], ['CC', 'VBD', 'RB', 'IN', 'DT', 'NN']]


[['there', 'later', 'housed', '', '', '', 'mary'],
 ['', 'wrote', 'to', '', '', '', 'library'],
 ['', 'read', '', 'novel', 'book'],
 ['', 'enchanted', '', 'comic', 'nook'],
 ['', 'eared', 'lovely', '', '', 'fairy']]