In [1]:
import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import numpy as np
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math
import pickle
import statistics
import sys
from functools import partial

from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import tqdm
import nltk

In [2]:

def make_dir_if_not_exists(directory):
	if not os.path.exists(directory):
		logging.info("Creating new directory: {}".format(directory))
		os.makedirs(directory)

def print_list(l, K=None):
	for i, e in enumerate(l):
		if i == K:
			break
		print(e)
	print()

def remove_multiple_spaces(string):
	return re.sub(r'\s+', ' ', string).strip()

def save_in_pickle(save_object, save_file):
	with open(save_file, "wb") as pickle_out:
		pickle.dump(save_object, pickle_out)

def load_from_pickle(pickle_file):
	with open(pickle_file, "rb") as pickle_in:
		return pickle.load(pickle_in)

def save_in_txt(list_of_strings, save_file):
	with open(save_file, "w") as writer:
		for line in list_of_strings:
			line = line.strip()
			writer.write(f"{line}\n")

def load_from_txt(txt_file):
	with open(txt_file, "r") as reader:
		all_lines = list()
		for line in reader:
			line = line.strip()
			all_lines.append(line)
		return all_lines

In [3]:
import pandas as pd

print(torch.cuda.is_available())
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print("Using device:", device)

True
Using device: cuda


In [4]:
data_file = 'with_epoque.csv'
data = pd.read_csv(data_file)
print(len(data))
print(data.head())

573
                                    author  \
0                      WILLIAM SHAKESPEARE   
1  DUCHESS OF NEWCASTLE MARGARET CAVENDISH   
2                           THOMAS BASTARD   
3                           EDMUND SPENSER   
4                        RICHARD BARNFIELD   

                                             content  \
0  Let the bird of loudest lay\r\nOn the sole Ara...   
1  Sir Charles into my chamber coming in,\r\nWhen...   
2  Our vice runs beyond all that old men saw,\r\n...   
3  Lo I the man, whose Muse whilome did maske,\r\...   
4  Long have I longd to see my love againe,\r\nSt...   

                                 poem name          age                  type  
0               The Phoenix and the Turtle  Renaissance  Mythology & Folklore  
1                 An Epilogue to the Above  Renaissance  Mythology & Folklore  
2                       Book 7, Epigram 42  Renaissance  Mythology & Folklore  
3  from The Faerie Queene: Book I, Canto I  Renaissance  Mytho

In [674]:
def make_data_training(df, char_max_line = 20):
    inputs = []
    context = []
    targets = []
    previous = []
    for i,rows in df.iterrows():
        splitted = rows['content'].split('\r\n')
        if len(splitted) > 4:
            for i,line in enumerate(splitted): 
                if len(line.strip()) > 0 and len(line.split(' ')) <= char_max_line:
                    if i==0:
                        previous.append(' ')
                    else:
                        previous.append(splitted[i-1])
                    inputs.append(line)
                    targets.append(line)
                    context.append(' '.join([str(rows['author'])]))
        
    return pd.DataFrame(list(zip(inputs, context, targets, previous)),columns =['text', 'context','target', 'previous'])


class PoemDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        return self.df.iloc[idx]

In [702]:
df = make_data_training(data, char_max_line = 30)

all_poems = df['text'].tolist()
context = df['context'].tolist()
previous = df['previous'].tolist()

In [703]:
pad_word = "<pad>"
bos_word = "<bos>"
eos_word = "<eos>"
unk_word = "<unk>"
sep_word = "sep"

pad_id = 0
bos_id = 1
eos_id = 2
unk_id = 3
sep_id = 4
    
def normalize_sentence(s):
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    s = re.sub(r"\s+", r" ", s).strip()
    return s

class Vocabulary:
    def __init__(self):
        self.word_to_id = {pad_word: pad_id, bos_word: bos_id, eos_word:eos_id, unk_word: unk_id, sep_word: sep_id}
        self.word_count = {}
        self.id_to_word = {pad_id: pad_word, bos_id: bos_word, eos_id: eos_word, unk_id: unk_word, sep_id: sep_word}
        self.num_words = 5
    
    def get_ids_from_sentence(self, sentence):
        sentence = normalize_sentence(sentence)
        sent_ids = [bos_id] + [self.word_to_id[word.lower()] if word.lower() in self.word_to_id \
                               else unk_id for word in sentence.split()] + \
                               [eos_id]
        return sent_ids
    
    def tokenized_sentence(self, sentence):
        sent_ids = self.get_ids_from_sentence(sentence)
        return [self.id_to_word[word_id] for word_id in sent_ids]

    def decode_sentence_from_ids(self, sent_ids):
        words = list()
        for i, word_id in enumerate(sent_ids):
            if word_id in [bos_id, eos_id, pad_id]:
                continue
            else:
                words.append(self.id_to_word[word_id])
        return ' '.join(words)

    def add_words_from_sentence(self, sentence):
        sentence = normalize_sentence(sentence)
        for word in sentence.split():
            if word not in self.word_to_id:
                self.word_to_id[word] = self.num_words
                self.id_to_word[self.num_words] = word
                self.word_count[word] = 1
                self.num_words += 1
            else:
                self.word_count[word] += 1

vocab = Vocabulary()
for src in df['text']:
    vocab.add_words_from_sentence(src.lower())
    
for cxt in df['context']:
    vocab.add_words_from_sentence(cxt.lower())

print(f"Total words in the vocabulary = {vocab.num_words}")

Total words in the vocabulary = 11264


In [800]:
from sklearn.feature_extraction.text import TfidfVectorizer


tfIdfVectorizer=TfidfVectorizer()
tfIdf = tfIdfVectorizer.fit_transform(all_poems)

X = tfIdfVectorizer.transform(["Let the bird of loudest lay"])
names = np.array(tfIdfVectorizer.get_feature_names())
ind = np.array(X.indices[X.data.sort()][0][-3:][::-1])
res = names[ind]

print(' '.join(res))

bird lay let


In [843]:
class Poem_dataset(Dataset):

    def __init__(self, poems, context, previous, vocab, device):

        l = []
        
        for i in range(len(poems)):
            X = tfIdfVectorizer.transform([poems[i]])
            ind = np.array(X.indices[X.data.sort()][0][-3:][::-1])
            key_words = names[ind]
            l.append( (context[i] + " sep " + ' '.join(key_words), poems[i] ))
        
        self.poems = l.copy()
        self.vocab = vocab
        self.device = device

        def encode(src, tgt):
            src_ids = self.vocab.get_ids_from_sentence(src)
            tgt_ids = self.vocab.get_ids_from_sentence(tgt)
            return (src_ids, tgt_ids)

        # We will pre-tokenize the conversations and save in id lists for later use
        self.tokenized_poems = [encode(src, tgt) for src, tgt in self.poems]
        
    def __len__(self):
        return len(self.poems)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        return {"conv_ids":self.tokenized_poems[idx], "conv":self.poems[idx]}

def collate_fn(data):
    src_ids = [torch.LongTensor(e["conv_ids"][0]) for e in data]
    tgt_ids = [torch.LongTensor(e["conv_ids"][1]) for e in data]
    src_str = [e["conv"][0] for e in data]
    tgt_str = [e["conv"][1] for e in data]
    data = list(zip(src_ids, tgt_ids, src_str, tgt_str))
    data.sort(key=lambda x: len(x[0]), reverse=True)
    src_ids, tgt_ids, src_str, tgt_str = zip(*data)

    src_seqs = nn.utils.rnn.pad_sequence(src_ids, padding_value = pad_id,
                                         batch_first = False)
    tgt_seqs = nn.utils.rnn.pad_sequence(tgt_ids, padding_value = pad_id, 
                                         batch_first = False)
    
    src_padded_length = len(src_seqs[0])
    tgt_padded_length = len(tgt_seqs[0])
    return {"conv_ids":(src_ids, tgt_ids), "conv":(src_str, tgt_str), "conv_tensors":(src_seqs.to(device), tgt_seqs.to(device))}

In [844]:
dataset = Poem_dataset(all_poems, context, previous, vocab, device)

In [845]:
for src, tgt in dataset.poems[0:5]:
    sentence = src
    word_tokens = vocab.tokenized_sentence(sentence)
    word_ids = vocab.get_ids_from_sentence(sentence)
    print(sentence)
    print(tgt)
    print(word_tokens)
    print()

word = "world"
word_id = vocab.word_to_id[word.lower()]
print(f"Word = {word}")
print(f"Word ID = {word_id}")
print(f"Word decoded from ID = {vocab.decode_sentence_from_ids([word_id])}")

WILLIAM SHAKESPEARE sep bird lay let
Let the bird of loudest lay
['<bos>', 'william', 'shakespeare', 'sep', 'bird', 'lay', 'let', '<eos>']

WILLIAM SHAKESPEARE sep arabian on sole
On the sole Arabian tree
['<bos>', 'william', 'shakespeare', 'sep', 'arabian', 'on', 'sole', '<eos>']

WILLIAM SHAKESPEARE sep and be herald
Herald sad and trumpet be,
['<bos>', 'william', 'shakespeare', 'sep', 'and', 'be', 'herald', '<eos>']

WILLIAM SHAKESPEARE sep chaste obey sound
To whose sound chaste wings obey.
['<bos>', 'william', 'shakespeare', 'sep', 'chaste', 'obey', 'sound', '<eos>']

WILLIAM SHAKESPEARE sep but harbinger shrieking
But thou shrieking harbinger,
['<bos>', 'william', 'shakespeare', 'sep', 'but', 'harbinger', 'shrieking', '<eos>']

Word = world
Word ID = 392
Word decoded from ID = world


In [846]:
def read_GloVe(filename):
  embeddings = {}
  for line in open(filename).readlines():
    fields = line.strip().split(" ")
    word = fields[0]
    embeddings[word] = [float(x) for x in fields[1:]]
  return embeddings

GloVe = read_GloVe("glove.840B.300d.conll_filtered.txt")

In [847]:
class Erato(nn.Module):
    def __init__(self, vocab, emb_dim = 300, hidden_dim = 300, num_layers = 2, dropout=0.1):
        super().__init__()

        
        self.num_words = num_words = vocab.num_words
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
   
        self.encode_emb = nn.Embedding(self.num_words,self.emb_dim)
        
        self.init_glove(GloVe, vocab)
        
        self.encode_gru = nn.GRU(self.emb_dim, self.hidden_dim,
                          num_layers=self.num_layers, dropout=dropout,
                          bidirectional=True,batch_first=False)
        self.encode_l_hidden = nn.Linear(2*self.num_layers,self.num_layers)
        self.encode_l_output = nn.Linear(2*self.hidden_dim,self.hidden_dim)

        self.dropout_enc = nn.Dropout(dropout)

        self.decode_emb = self.encode_emb
        
        self.decode_gru = nn.GRU(self.emb_dim, self.hidden_dim,
                          num_layers=self.num_layers, dropout=dropout,
                          bidirectional=False,batch_first=False)
        self.d_l = nn.Linear(self.hidden_dim,self.num_words)
        self.logsoftmax = nn.LogSoftmax(dim=2)
        self.loss = nn.CrossEntropyLoss(ignore_index=pad_id)
        self.dropout_dec = nn.Dropout(dropout)
        
        self.softmax_att = nn.Softmax(dim=0)
        self.attention_matrix = nn.Linear(self.hidden_dim,self.hidden_dim)
        self.attention_decode_cat = nn.Linear(2*self.hidden_dim,self.num_words)
    
    def init_glove(self, GloVe, vocab):
        weights_emb = self.encode_emb.weight.data.clone()
        
        for i, word in enumerate(vocab.word_to_id):
            if word in GloVe:
                weights_emb[vocab.word_to_id[word],:] = torch.tensor(GloVe[word])
          
        self.encode_emb = nn.Embedding.from_pretrained(weights_emb.clone(),freeze = False)
    
    def encode(self, source):
        source_lengths = torch.sum(source != pad_id, axis=0).cpu()

        emb = self.dropout_enc(self.encode_emb(source))
        emb = nn.utils.rnn.pack_padded_sequence(emb, source_lengths,
                                                enforce_sorted = False)
        encoder_output, encoder_hidden = self.encode_gru(emb)
        encoder_output,_ = nn.utils.rnn.pad_packed_sequence(encoder_output,
                                                   padding_value=pad_id)
  
        encoder_output = self.encode_l_output(encoder_output)
        
        encoder_hidden = self.encode_l_hidden(encoder_hidden.permute(2,1,0))
        encoder_hidden = encoder_hidden.permute(2,1,0).contiguous()
        # Compute the encoder mask
        encoder_mask = (source == pad_id)

        return encoder_output, encoder_mask.type(torch.bool), encoder_hidden

    def decode(self, decoder_input, last_hidden, encoder_output, encoder_mask):

        emb = self.dropout_dec(self.decode_emb(decoder_input))
        decoder_output, decoder_hidden = self.decode_gru(emb,last_hidden)
        b = decoder_output.squeeze(0)

        # I use the General method (Luong2015) for attention
        encoder_output = encoder_output.masked_fill(encoder_mask.unsqueeze(2),0)
        att = torch.matmul(self.attention_matrix(decoder_output.permute(1,0,2)),
                           encoder_output.permute(1,2,0))
        att = att.squeeze(1).permute(1,0)
        
        att = att.masked_fill(encoder_mask, float("-inf"))
        att = self.softmax_att(att)
        c = att.unsqueeze(2) * encoder_output
        c = torch.sum(c,0)
        logits = self.attention_decode_cat(torch.cat((b,c),1))
        return (logits, decoder_hidden, att)

    def compute_loss(self, source, target):

        max_source_sequence_length = target.shape[0]
        local_batch_size = target.shape[1]
        encoder_output, encoder_mask, h = self.encode(source)
        input_decode = target[0,:].unsqueeze(0)
        loss = 0
        for t in range(1,max_source_sequence_length):
            out,h,_ = self.decode(input_decode, h, encoder_output, encoder_mask)
            input_decode = target[t,:].unsqueeze(0)
            loss += self.loss(out, input_decode.squeeze())
        return loss / (max_source_sequence_length-1)

In [848]:
def train(model, data_loader, num_epochs, model_file, learning_rate=0.0001):

    decoder_learning_ratio = 5.0
    encoder_parameter_names = ['encode_emb', 'encode_gru', 'l1', 'l2']
                           
    encoder_named_params = list(filter(lambda kv: any(key in kv[0] for key in encoder_parameter_names), model.named_parameters()))
    decoder_named_params = list(filter(lambda kv: not any(key in kv[0] for key in encoder_parameter_names), model.named_parameters()))
    encoder_params = [e[1] for e in encoder_named_params]
    decoder_params = [e[1] for e in decoder_named_params]
    optimizer = torch.optim.AdamW([{'params': encoder_params},
                {'params': decoder_params, 'lr': learning_rate * decoder_learning_ratio}], lr=learning_rate)
    
    clip = 50.0
    for epoch in tqdm.notebook.trange(num_epochs, desc="training", unit="epoch"):
        with tqdm.notebook.tqdm(
                data_loader,
                desc="epoch {}".format(epoch + 1),
                unit="batch",
                total=len(data_loader)) as batch_iterator:
            model.train()
            total_loss = 0.0
            for i, batch_data in enumerate(batch_iterator, start=1):
                source, target = batch_data["conv_tensors"]
                optimizer.zero_grad()
                loss = model.compute_loss(source, target)
                total_loss += loss.item()
                loss.backward()
                _ = nn.utils.clip_grad_norm_(model.parameters(), clip)
                optimizer.step()

                batch_iterator.set_postfix(mean_loss=total_loss / i, current_loss=loss.item())
       
    torch.save(model.state_dict(), model_file)

# Define the model

In [849]:
Erato_model = Erato(vocab).to(device)

In [868]:
num_epochs = 5
batch_size = 64
learning_rate = 0.001

data_loader = DataLoader(dataset=dataset, batch_size=batch_size, 
                               shuffle=True, collate_fn=collate_fn)


train(Erato_model, data_loader, num_epochs, "baseline_model.pt",learning_rate=learning_rate)

training:   0%|          | 0/5 [00:00<?, ?epoch/s]

epoch 1:   0%|          | 0/211 [00:00<?, ?batch/s]

epoch 2:   0%|          | 0/211 [00:00<?, ?batch/s]

epoch 3:   0%|          | 0/211 [00:00<?, ?batch/s]

epoch 4:   0%|          | 0/211 [00:00<?, ?batch/s]

epoch 5:   0%|          | 0/211 [00:00<?, ?batch/s]

In [877]:
Path = "./saved_Erato"
torch.save(Erato_model, Path)

In [878]:
Erato_model = torch.load(Path)

In [869]:
def predict_greedy(model, sentence, max_length=100):
    """Make predictions for the given input using greedy inference.
    
    Args:
        model: A sequence-to-sequence model.
        sentence: A input string.
        max_length: The maximum length at which to truncate outputs in order to
            avoid non-terminating inference.
    
    Returns:
        Model's predicted greedy response for the input, represented as string.
    """

    # You should make only one call to model.encode() at the start of the function, 
    # and make only one call to model.decode() per inference step.
    model.eval()    
    src_id = torch.tensor(vocab.get_ids_from_sentence(sentence))[:,None].to(device)
    encoder_output, encoder_mask, last_hidden = model.encode(src_id) 
    input = src_id[0,:]
    out = [bos_id]
    for t in range(max_length):
        input = input[None,:]
        out_decoder, last_hidden, _ = model.decode(input, last_hidden, encoder_output, encoder_mask)
        input = out_decoder.argmax(dim=-1)
        word = input.item()
        out.append(word)
        if word == eos_id:
            break
    
    decoded = vocab.decode_sentence_from_ids(out)
    return decoded
    

In [963]:
def predict_beam(model, sentence, k=5, max_length=100, hidden = None):

    alpha = 0.3
    model.eval()
    
    sentence_ids = torch.tensor(vocab.get_ids_from_sentence(sentence)).cuda()
    sentence_ids = sentence_ids.unsqueeze(1)
    encoder_output, encoder_mask, h = model.encode(sentence_ids)

    out_start = sentence_ids[0]
    beam = [out_start for i in range(k)]
    beam_scores = [1 for i in range(k)]
    
    if hidden:
        h = hidden
    hiddens = [h for i in range(k)]
    
    generations = []
    generations_scores = []
    curr_l = 0
    eos_tensor = torch.Tensor([eos_id]).int().cuda()
    while beam:
        logits = torch.Tensor().cuda()
        inds = torch.Tensor().int().cuda()
        curr_k = len(beam)
        if curr_l==max_length:
            for i in range(curr_k):
                  generations += [torch.cat((beam[i],eos_tensor),0)]
                  generations_scores += [new_beam_scores[i]]
            break
        else:
            for i in range(curr_k):
                out, hiddens[i], _ = model.decode(beam[i][-1].view(1,1), hiddens[i], encoder_output,
                                     encoder_mask)
                logit,ind = torch.topk(out.squeeze(), curr_k, dim=0)
                logits = torch.cat((logits,logit),0)
                inds = torch.cat((inds,ind),0)
            new_beam = []
            new_beam_scores = []
            new_hiddens = []
            if curr_l==0:
                for i in range(curr_k):
                    max_ind = torch.argmax(nn.functional.log_softmax(logit,dim=0))
                    new_beam_scores += [float(logit[max_ind])]
                    logit[max_ind] = -1e9
                    new_beam += [torch.cat((beam[0],ind[max_ind].unsqueeze(0)),0)]
                    new_hiddens += [hiddens[0]]
            else:
                top_logits,top_inds_logit = torch.topk(torch.repeat_interleave(torch.Tensor(beam_scores).cuda(),
                                                                               curr_k)\
                                                       +nn.functional.log_softmax(logits,dim=0),
                                                       curr_k, dim=0)
                for i in range(curr_k):
                    if inds[top_inds_logit[i]]==eos_id:
                        generations += [torch.cat((beam[top_inds_logit[i]//curr_k],inds[top_inds_logit[i]].unsqueeze(0)),0)]
                        generations_scores+=[float(logits[top_inds_logit[i]])/(generations[-1].shape[0]**alpha)]
                    else:
                        new_beam += [torch.cat((beam[top_inds_logit[i]//curr_k],inds[top_inds_logit[i]].unsqueeze(0)),0)]
                        new_hiddens += [hiddens[top_inds_logit[i]//curr_k]]
                        new_beam_scores += [float(logits[top_inds_logit[i]])]
            beam = new_beam
            beam_scores = new_beam_scores
            hiddens = new_hiddens
        curr_l +=1
    generations = [g for _, g in sorted(zip(generations_scores, generations))]
    generations.reverse()
    sorted_scores = sorted(generations_scores)
    sorted_scores.reverse()
    return [vocab.decode_sentence_from_ids(s.tolist()) for s in generations], sorted_scores

In [1015]:
def predict_beam2(model, sentence, k=5, max_length=100):
    """Make predictions for the given inputs using beam search.
    
    Args:
        model: A sequence-to-sequence model.
        sentence: An input sentence, represented as string.
        k: The size of the beam.
        max_length: The maximum length at which to truncate outputs in order to
            avoid non-terminating inference.
    
    Returns:
        A list of k beam predictions. Each element in the list should be a string
        corresponding to one of the top k predictions for the corresponding input,
        sorted in descending order by its final score.
    """

    # Implementation tip: once an eos_token has been generated for any beam, 
    # remove its subsequent predictions from that beam by adding a small negative 
    # number like -1e9 to the appropriate logits. This will ensure that the 
    # candidates are removed from the beam, as its probability will be very close
    # to 0. Using this method, uou will be able to reuse the beam of an already 
    # finished candidate

    # Implementation tip: while you are encouraged to keep your tensor dimensions
    # constant for simplicity (aside from the sequence length), some special care
    # will need to be taken on the first iteration to ensure that your beam
    # doesn't fill up with k identical copies of the same candidate.
    
    # You are welcome to tweak alpha
    alpha = 0.7
    model.eval()
    
    # YOUR CODE HERE
    log_P_T = torch.log(torch.tensor(list(vocab.word_count.values()))/len(vocab.word_count.values())).to(device)
    
    beams = torch.ones((max_length,k*k)) * bos_id
    probs = torch.ones((max_length,k*k))
    top_beams = (torch.ones(k,device = device, dtype = torch.int) * bos_id)
    top_probs = (torch.zeros(k,device = device, dtype = torch.int))
    src_id = torch.tensor(vocab.get_ids_from_sentence(sentence))[:,None].to(device)
    encoder_output, encoder_mask, last_hidden = model.encode(src_id)
    hidden_list = [last_hidden for i in range(k)]
    current_beams = [[bos_id] for i in range(k)]
    input = src_id[None,0,:]
    generations = []
    generations_scores = []
    
    for t in range(max_length):
        start_hidden = last_hidden
        for i in range(k):
            last_hidden = hidden_list[i]
            input = top_beams[None,None,i]
            out_decoder, last_hidden, _ = model.decode(input, last_hidden, encoder_output, encoder_mask)
            out_decoder = nn.functional.log_softmax(out_decoder, dim = 2)
            sorted, indices  = torch.sort(out_decoder, dim=- 1, descending=True)
            beams[t+1,i*k:(i+1)*k] = indices[0,0,0:k]
            probs[t+1,i*k:(i+1)*k] = top_probs[i] + sorted[0,0,0:k] 
            hidden_list[i] = last_hidden
        
        if t == 0:
            values_p = probs[t+1,:].unique()
            beams_p = beams[t+1,:].unique()
            sorts, inds = torch.sort(values_p, dim=- 1, descending=True)
            top_beams = beams_p[inds].int().to(device)
            
            for j,l in enumerate(current_beams):
                l.append(top_beams[j].item())
            
        else:
            sorted_p, indices_p = torch.sort(probs[t+1,:], dim=- 1, descending=True)
            top_probs = sorted_p[0:k]
            top_beams = beams[t+1,indices_p[0:k]].int().to(device)
            ancestor = torch.div(indices_p[0:k], k, rounding_mode='floor')
            prev_beams = current_beams
            current_beams = []
            prev_hidden_list = hidden_list.copy()
            hidden_list = []
            for j, a in enumerate(ancestor):
                current_beams.append(prev_beams[a]+[top_beams[j].item()])
                hidden_list.append(prev_hidden_list[a])
#         print(list(map(vocab.decode_sentence_from_ids,current_beams)))
        
        for i,l in enumerate(current_beams):
            if l[-1] == eos_id:
                generations.append(vocab.decode_sentence_from_ids(l))
                generations_scores.append(top_probs[i].item())
                if len(generations) == k:
                    lens_sent = [len(s) for s in generations]
                    scores = [ s/(l**alpha) for s,l in zip(generations_scores,lens_sent)]
                    sorted_gen = np.array(generations)[np.argsort(scores)[::-1]]
                    return sorted_gen
                top_probs[i] = float("-inf")
            

In [1016]:
sentence = "WILLIAM SHAKESPEARE sep i thought i thought i thought i thought ,"
print(predict_greedy(Erato_model, sentence, max_length=100))
print()
b, s = predict_beam(Erato_model, sentence, k=5, max_length=100)


print(b)

thought i thought this thought thought i thought this thought

['when i thought this thought i thought this thought should thought', 'when i thought i thought this thought was thought', 'yet i thought i thought this thought was thought', 'when i thought this thought i thought this thought', 'when i thought this thought i thought this thought should i']


In [1017]:
sentence = "WILLIAM SHAKESPEARE sep i thought i thought i thought i thought ,"
predict_beam2(Erato_model, sentence, k=5, max_length=100)

IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)

In [998]:
def generate_peom(model, author, key_words, method = "greedy", k_beam = 10, nb_line = 5):
    out = []
    inp_sentence = author + " sep " +  ' '.join(key_words)
    print(inp_sentence)
    print()
    for nb in range(nb_line):
        if method == "greedy":
            sent_out = predict_greedy(model, inp_sentence, max_length=100)
            out.append(sent_out)
            inp_sentence = author + " sep " + sent_out  
            
        else:
            sent_out_beam, scores = predict_beam(model, inp_sentence, k=k_beam, max_length=100)
            sent_out = sent_out_beam[0]
            scores = np.array(scores)/sum(scores)
            out_sent = np.random.choice(sent_out_beam, p = scores)
            out.append(out_sent)
            rand_words = np.random.choice(out_sent.split(),2)
            inp_sentence = author + " sep " + ' '.join(key_words) + ' '.join(rand_words)
    return out

In [1014]:
key_words = ['sea']
author = "THOMAS BASTARD"
method = "beam"
poem = generate_peom(Erato_model, author, key_words, method = method, k_beam = 50, nb_line = 10)


print(',\n'.join(poem))

THOMAS BASTARD sep sea

a deep light upon the old sea,
off to sing a deep cloud,
while thou sing st the best s delight,
powdered thou upon their beauties feet .,
when i must fortune out upon the wall .,
rewards upon our cruel plaine,
while i am slain of cruel cheer,
blest a honour in a field of labor .,
a store of a dreadful priest s light,
vouchsafe up a cruel light upon
