In [1]:
%load_ext autoreload
%autoreload 2

In [5]:
# from py_files.Limericks import Limerick_Generate
from py_files.templates import *

import re
import random
import pickle

In [3]:
def clean_corpus(file):
    corpus = open(file, encoding='latin1')
    raw_text = corpus.read()
    
    processed_text = re.sub(r'[0-9]', r'', raw_text)
    processed_text = re.sub(r'\n\n', r' ', processed_text)
    processed_text = re.sub(r'[.,?!;:]', r'', processed_text).lower().split()
    corpus.close()
    return processed_text

def train_on_corpus(processed_text):
    chain = {}
    for i in range(1, len(processed_text)):
        key = processed_text[i - 1]
        token = processed_text[i]
        if key not in chain:
            chain[key] = []
        chain[key].append(token)
    return chain

In [11]:
all_tokens = [*clean_corpus('data/all_combined/input.txt'), *clean_corpus('data/limericks.txt'),
             *clean_corpus('data/gutenberg-poetry-v001.txt')]

In [6]:
with open('py_files/saved_objects/postag_dict_all.p', 'rb') as f:
    postag = pickle.load(f)
    
pos_to_words = postag[1]
words_to_pos = postag[2]

In [12]:
%%time

forward_chain = train_on_corpus(all_tokens)
all_tokens.reverse()
backward_chain = train_on_corpus(all_tokens)

CPU times: user 28.5 s, sys: 1.92 s, total: 30.4 s
Wall time: 30.6 s


In [13]:
def fill_word_pos(chain, pos, prev):
    if pos in ['.', ',']:
        return pos
    i = 0
    if prev is None or prev is '[EMPTY]':
        word = random.choice(list(chain.keys()))
        while pos not in words_to_pos[word] and i < 100:
            word = random.choice(list(chain.keys()))
            i += 1
        if i >= 100:
            word = random.choice(pos_to_words[pos])
        return word
    # o/w has a previous word
    word = random.choice(chain[prev])
    while pos not in words_to_pos[word] and i < 100:
        word = random.choice(chain[prev])
        i += 1
    if i >= 100:
        word = random.choice(pos_to_words[pos])
    return word
        
def fill_forward(template):
    prev_word = None
    line = []
    for pos in template:
        line.append(fill_word_pos(forward_chain, pos, prev_word))
        prev_word = line[-1]
    return line
    
def fill_rand_templates(template, num_gen=10):
    for i in range(num_gen):
        line = fill_forward(t)
        print("*****************")
        print(line)
        print(t)

In [14]:
first_templates = get_first_line()
dataset, second_line, third_line, last_two = get_templates()
templates = {
    1: first_templates,
    2: second_line,
    3: third_line,
    4: last_two
}

In [17]:
l = random.choice(list(dataset.values()))
t, og = random.choice(l)
print(og)
fill_rand_templates(t)

['soon', 'a', 'happy', 'thought', 'hit', 'her']
*****************
['fitfully', 'the', 'consecrated', 'bower', 'faced', 'my']
['RB', 'DT', 'JJ', 'NN', 'VBD', 'PRP$']
*****************
['madly', 'another', 'worn', 'song', 'was', 'his']
['RB', 'DT', 'JJ', 'NN', 'VBD', 'PRP$']
*****************
['imagind', 'these', 'lethal', 'vapor', 'was', 'his']
['RB', 'DT', 'JJ', 'NN', 'VBD', 'PRP$']
*****************
['silverly', 'the', 'small', 'table', 'drank', 'their']
['RB', 'DT', 'JJ', 'NN', 'VBD', 'PRP$']
*****************
['flexibly', 'some', 'poor', 'blind', 'said', 'their']
['RB', 'DT', 'JJ', 'NN', 'VBD', 'PRP$']
*****************
['monotonously', 'the', 'hollow', 'pearl', 'poured', 'his']
['RB', 'DT', 'JJ', 'NN', 'VBD', 'PRP$']
*****************
['deadliness', 'every', 'good', 'mother', 'lent', 'its']
['RB', 'DT', 'JJ', 'NN', 'VBD', 'PRP$']
*****************
['lovingly', 'each', 'rocky', 'seat', 'took', 'their']
['RB', 'DT', 'JJ', 'NN', 'VBD', 'PRP$']
*****************
['affectionately', 'the

In [21]:
def fill_backward(template, prev_word=None):
    if prev_word:
        line = [prev_word]
    else:
        line = []
    i = len(template) - len(line) - 1
    while i >= 0:
        pos = template[i]
        line.insert(0, fill_word_pos(backward_chain, pos, prev_word))
        if line[0] not in ['.', ',']:
            prev_word = line[0]
        i -= 1
    return line

def gen_poem(five_words):
    poem = []
    for i in range(3):
        word_pos = words_to_pos[five_words[i]][0]
        t, og = random.choice(templates[i + 1][word_pos])
        poem.append((fill_backward(t, prev_word=five_words[i]), t, og))
    last_two_pos = words_to_pos[five_words[3]][0] + '-' + words_to_pos[five_words[4]][0]
    t, og, idx = random.choice(templates[4][last_two_pos])
    poem.append((fill_backward(t[:idx + 1], prev_word=five_words[3]), t[:idx + 1], og[:idx + 1]))
    poem.append((fill_backward(t[idx + 1:], prev_word=five_words[4]), t[idx + 1:], og[idx + 1:]))
    return poem

In [22]:
def print_poem(five_words):
    for line, template, og in gen_poem(five_words):
        print(' '.join(line))
        print(template)
        print(og)
        print('**********************')

In [23]:
print_poem(('greece', 'peace', 'mind', 'kind', 'piece'))

there found a manly pride sent greece
['EX', 'VBD', 'DT', 'JJ', 'NN', 'VBN', 'NNP']
['there', 'was', 'a', 'young', 'fellow', 'called', 'binn']
**********************
who loved the picture of peace
['WHO', 'VBD', 'DT', 'NN', 'IN', 'NN']
['who', 'hadnt', 'an', 'atom', 'of', 'fear']
**********************
himself lackeyed twentyeight to mind
['PRP', 'VBD', 'CD', 'TO', 'NN']
['she', 'gave', 'one', 'to', 'adam']
**********************
he was so human kind
['PRP', 'VBD', 'SO', 'JJ', 'NN']
['she', 'ate', 'so', 'much', 'spice']
**********************
that they called it hot piece
['IN', 'PRP', 'VBD', 'PRP', 'JJ', 'NN']
['that', 'she', 'pickled', 'her', 'internal', "workins'"]
**********************


In [29]:
print_poem(('dane', 'gain', 'decrease', 'increase', 'vein'))

there here bribed the wind in the dane
['EX', 'RB', 'VBD', 'DT', 'NN', 'IN', 'DT', 'NN']
['there', 'once', 'was', 'a', 'fly', 'on', 'the', 'wall']
**********************
who cut off their bones as doth gain
['WHO', 'VBD', 'RP', 'PRP$', 'NNS', 'IN', 'DT', 'NN']
['who', 'sent', 'out', 'his', 'cards', 'for', 'a', 'party']
**********************
and our birdsong , wellgristled decrease
['CC', 'PRP$', 'NN', ',', 'VBN', 'NN']
['but', 'his', 'daughter', ',', 'named', 'nan']
**********************
it is no increase
['PRP', 'VBZ', 'DT', 'NN']
['she', "'s", 'a', 'person']
**********************
when me molest me , i have exhausted every vein
['WHEN', 'PRP', 'VBP', 'PRP', ',', 'PRP', 'VBP', 'VBN', 'DT', 'NN']
['when', 'I', 'wear', 'it', ',', 'I', 'am', 'called', 'a', 'vermine']
**********************


In [32]:
print_poem(('thrace', 'face', 'report', 'court', 'case'))

there held a general system described thrace
['EX', 'VBD', 'DT', 'JJ', 'NN', 'VBN', 'NNP']
['there', 'was', 'a', 'young', 'lady', 'named', 'rose']
**********************
who thought with thy face
['WHO', 'VBD', 'IN', 'JJ', 'NN']
['who', 'lived', 'on', 'distilled', 'kerosene']
**********************
and its honey , made report
['CC', 'PRP$', 'NN', ',', 'VBN', 'NN']
['but', 'his', 'daughter', ',', 'named', 'nan']
**********************
fry yet melt into the court
['VBP', 'RB', 'VB', 'IN', 'DT', 'NN']
['do', "n't", 'spit', 'on', 'the', 'floor']
**********************
so they left now and scrap of this case
['SO', 'PRP', 'VBD', 'RB', 'CC', 'VB', 'IN', 'DT', 'NN']
['so', 'he', 'jumped', 'up', 'and', 'spat', 'on', 'the', 'ceiling']
**********************


In [127]:
with open('data/all_combined/input.txt', encoding='latin1') as f:
    for i, line in enumerate(f):
        if i == 40841:
            print(line)

The course of Time and nationsEgypt, India, Greece and 



In [221]:
postag[1] = lg.pos_to_words
postag[2] = lg.words_to_pos

with open('py_files/saved_objects/postag_dict_all.p', 'wb') as f:
    pickle.dump(postag, f)