# Bedtime Stories Generator (－_－) zzZ

In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m71.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m77.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.28.1


In [2]:
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import numpy as np
import random
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm, trange
import torch.nn.functional as F
import csv
import re 

In [3]:
if torch.cuda.is_available():
    device_name = torch.device("cuda")
else:
    device_name = torch.device("cpu")
print(f"Using {device_name}")
device = device_name

Using cuda


In [5]:
import requests

# Shout out to https://ericmjl.github.io/blog/2023/4/11/how-to-programmatically-download-a-file-from-dropbox-without-the-dropbox-api/
# for this code! 
headers = {'user-agent': 'Wget/1.16 (linux-gnu)'}  # <-- the key is here!
r = requests.get("https://www.dropbox.com/s/d505cyj6bm6t8dh/raw_stories.txt?dl=0", stream=True, headers=headers)
data_file = "raw_stories.txt"
with open(data_file, 'wb') as f:
    f.write(r.content)

In [6]:
STORIES = {}
POEM_AUTHORS = ['EDWARD LEAR', 'ISAAC WATTS', 'JANE TAYLOR', 'PHOEBE CARY', 'ANN TAYLOR', 'ANONYMOUS', 'CHARLES KINGSLEY', 'CHARLES MACKAY', 'CLEMENT CLARKE MOORE', 'DAVID EVERETT', 'ELIZA LEE FOLLEN', 'FELICIA DOROTHEA HEMANS', 'FELICIA DOROTHEA HEMANS', 'FELICIA DOROTHEA HEMANS', 'FRANCIS C. WOODWORTH', 'FROM M. DE LAMOTTE', 'GEORGE MACDONALD', 'HANNAH FLAGG GOULD', 'HENRY WADSWORTH LONGFELLOW', 'JAMES HOGG', 'JAMES MERRICK',
                'JAMES WHITCOMB RILEY', 'JANE TAYLOR', 'JEMIMA LUKE', 'LEWIS CARROLL', 'LITTLE B. (TAYLOR?)', 'LYDIA MARIA CHILD', 'MARY HOWITT', 'MARY HOWITT', 'MARY HOWITT', 'OLD CAROL', 'REGINALD HEBER', 'RICHARD MONCKTON MILNES (LORD HOUGHTON)', 'ROBERT BURNS', 'ROBERT LOUIS STEVENSON', 'ROBERT SOUTHEY', 'SABINE BARING-GOULD', 'THOMAS HOOD', 'WILLIAM BRIGHTY RANDS', 'WILLIAM HOWITT', 'WILLIAM ROBERT SPENCER', 'WILLIAM SHAKESPEARE', 'WILLIAM WORDSWORTH']
STORY_TYPES = ['SCANDINAVIAN STORIES', 'GERMAN STORIES', 'FRENCH STORIES', 'ENGLISH STORIES','CELTIC STORIES', 'ITALIAN STORIES', 'JAPANESE STORIES', 'EAST INDIAN STORIES', 'AMERICAN INDIAN STORIES', 'ARABIAN STORIES', 'CHINESE STORIES', 'RUSSIAN STORIES', 'TALES FOR TINY TOTS', 'FANCIFUL STORIES', 'OUR CHILDREN', 'PINOCCHIO\'S ADVENTURES IN WONDERLAND[1]']


def clean_data():
    with open(data_file, "r") as f:
        lines = f.readlines()
        first_line = lines[0].strip(" \n")
        curr_title = re.sub('[\t]+', '', first_line).upper()
        for i in range(1, len(lines) - 1):
            line = lines[i].strip(" \n")
            line = re.sub('[\t]+', '', line)  # to remove tabs
            line = re.sub("        ", '', line)
            if len(line) == 0 or (line in POEM_AUTHORS) or (line in STORY_TYPES) or ("ADAPTED BY" in line):
                continue

            elif (line in ["CINDERELLA", "BLUE BEARD", "SUPPOSE!", "PRETTY COW", "THE OWL AND THE PUSSY-CAT"]):
                curr_title = line
                STORIES[curr_title] = []

            if (line == '\n' or len(line) < 3) and len(lines[i+1]) < 50:
                upcoming_title = lines[i+1].strip(" \n")
                curr_title = re.sub('[\t]+', '', upcoming_title).upper()

            elif (line[0].isnumeric()):
                curr_title = line.upper()
                STORIES[curr_title] = []

            elif (line.isupper() and ("ADAPTED BY" in lines[i+1] or "BY " in lines[i+1])):
                curr_title = line
                STORIES[curr_title] = []

            elif (line.isupper() and " STORY" in lines[i+1]):
                first_sentence = lines[i+1].split()
                if "--" in curr_title:
                    # replace with next chapter
                    curr_title = curr_title.split(
                        " --", 1)[0] + " --" + ' '.join(first_sentence[0:2])
                else:
                    curr_title = line + " --" + ' '.join(first_sentence[0:2])

                STORIES[curr_title] = [' '.join(first_sentence[2:])]

            elif (" STORY" in line or " Story." in line) and not ("OF" in line and not (" STORY" in lines[i+1])):
                first_sentence = line.split()
                if "--" in curr_title:
                    # replace with next chapter
                    curr_title = curr_title.split(
                        " --", 1)[0] + " --" + ' '.join(first_sentence[0:2])
                else:
                    curr_title = curr_title + " --" + \
                        ' '.join(first_sentence[0:2])

                begin_story = [' '.join(first_sentence[2:])]
                if len(begin_story) <= 1:
                    STORIES[curr_title] = []
                else:
                    STORIES[curr_title] = [' '.join(first_sentence[2:])]

            elif (line.isupper()) and not (str(lines[i+1].split()[0:2]).isupper() or ("THE END" in line) or ("\"" in line) or ("“" in line) or ("\'" in line) or ("{" in line) or (line in "TRESPASSERS WILL BE PROSECUTED") or (line in "FAMOUS DONKEY THE STAR OF THE DANCE") or ("ADAPTED BY" in line) or (line in "* A.D. 1482-1513")):
                if (len(line) >= 11 and len(line) < 50):
                    curr_title = line.upper()
                    STORIES[curr_title] = []

                elif (len(line) < 11 and len(line) < 50) or (line in "CHAPTER"):
                    if "--" in curr_title:
                        # replace with next chapter
                        curr_title = curr_title.split(
                            " --", 1)[0] + " --" + line
                    if not (line in curr_title):
                        curr_title = curr_title + " --" + line
                    STORIES[curr_title] = []

            elif (curr_title in STORIES) and (line.upper() != curr_title):
                STORIES[curr_title].append(line)
            else:
                STORIES[curr_title] = []

    # To finalize the cleaning: removes extra titles that never got fed in
    STORIES_COPY = STORIES.copy()
    for story in STORIES_COPY:
        if STORIES[story] == []:
            STORIES.pop(story)

    # Making each story in the dictionary a full string so that it matches the tutorial's format
    for title in STORIES:
        STORIES[title] = ' '.join(STORIES[title])

clean_data()

## Saved models using dropbox.


In [10]:
import requests

# Shout out to https://ericmjl.github.io/blog/2023/4/11/how-to-programmatically-download-a-file-from-dropbox-without-the-dropbox-api/
# for this code! 
headers = {'user-agent': 'Wget/1.16 (linux-gnu)'}  # <-- the key is here!
r = requests.get("https://www.dropbox.com/s/c0h6yqea0to79f6/storyGenerator.pt?dl=0", stream=True, headers=headers)

story_generator_path = "storyGenerator.pt"
with open(story_generator_path, 'wb') as f:
    f.write(r.content)

In [11]:
import requests

# Shout out to https://ericmjl.github.io/blog/2023/4/11/how-to-programmatically-download-a-file-from-dropbox-without-the-dropbox-api/
# for this code! 
headers = {'user-agent': 'Wget/1.16 (linux-gnu)'}  # <-- the key is here!
r = requests.get("https://www.dropbox.com/s/w1t3dfewpws7e0l/titleGenerator.pt?dl=0", stream=True, headers=headers)

title_generator_path = "titleGenerator.pt"
with open(title_generator_path, 'wb') as f:
    f.write(r.content)

In [12]:
# Lowercase, remove punctuation and numbers from titles
def clean_title(title):
    '''
    Removes punctuation, lowercases and numbers from titles
    '''
    # upper- to lowercase
    title = str(title).lower()

    # remove numbers
    title = re.sub(r"[0123456789]+\ *", " ", title)

    # remove punctuation
    title = re.sub(r"[,.&$%<>@#?-_*/\()~='+;!:`]+\ *", " ", title)
    title = re.sub("''", ' ', title)
    title = re.sub('-', ' ', title)

    # remove duplicated spaces
    title = re.sub(' +', ' ', title)
    
    return title.strip()

In [13]:
# Creating the vocabulary ------ 
end_of_sentence = '.' # symbol to denote the end of the sentence
def create_vocabulary(stories):
    '''
    Creates a vocabulary of the story titles.
    '''
    vocabulary = set()
    
    for title in stories:
      title_words = clean_title(title).split(" ")
      vocabulary.update(title_words)
      
    word_list = list(vocabulary)
    word_list.append(end_of_sentence)
    vocabulary = {word_list[word]:word for word in range(0,len(word_list))}
    return vocabulary

# create vocabulary of the fairy tale titles
# titles: index for the one-hot shot
VOCABULARY = create_vocabulary(STORIES)
vocab_size = len(VOCABULARY)

print("The vocabulary:")
print(VOCABULARY)

print(f"Total number of unique words: {vocab_size}")

The vocabulary:
{'': 0, 'has': 1, 'rowland': 2, 'pan': 3, 'hog': 4, 'sort': 5, 'alligator': 6, 'visitors': 7, 'adventures': 8, 'john': 9, 'sweet': 10, 'wilkins': 11, 'nice': 12, 'llustration': 13, 'scuttle': 14, 'hump': 15, 'breast': 16, 'wedding': 17, 'constitution': 18, 'testiment': 19, 'prevented': 20, 'danger': 21, 'try': 22, 'owl': 23, 'ruby': 24, 'queens': 25, 'bees': 26, 'emily': 27, 'conquer': 28, 'better': 29, 'laurence': 30, 'irrigation': 31, 'found': 32, 'ears': 33, 'japan': 34, 'squall': 35, 'bulls': 36, 'been': 37, 'canada': 38, 'grants': 39, 'fiery': 40, 'whom': 41, 'woodman': 42, 'friesland': 43, 'missing': 44, 'ahab': 45, 'paf': 46, 'chicken': 47, 'boys': 48, 'box': 49, 'keep': 50, 'history': 51, 'tree': 52, 'tender': 53, 'caliph': 54, 'elephant’s': 55, 'written': 56, 'starbottle': 57, 'bread': 58, 'roi': 59, 'mervo': 60, 'express': 61, 'griffin': 62, 'across': 63, 'vulcan': 64, 'curtis': 65, 'ends': 66, 'alexandre': 67, 'fear': 68, 'pointer': 69, 'drop': 70, 'gossip': 

In [14]:
# Word to tensor encodings ...

# Translate word to an index from vocabulary
def word_to_index(word):
    if (word != end_of_sentence):
        word = clean_title(word)
    return VOCABULARY[word]

# Translate word to 1-hot tensor
def word_to_tensor(word):
    tensor = torch.zeros(1, 1, vocab_size)
    tensor[0][0][word_to_index(word)] = 1
    return tensor

# Turn a title into a <title_length x 1 x vocab_size>,
# or an array of one-hot vectors
def title_to_tensor(title):
    title_words = clean_title(title).split(' ')
    tensor = torch.zeros(len(title_words) + 1, 1, vocab_size)
    for index in range(len(title_words)):
        tensor[index][0][word_to_index(title_words[index])] = 1
    
    tensor[len(title_words)][0][VOCABULARY[end_of_sentence]] = 1
    return tensor

# Turn a sequence of words from title into tensor <sequence_length x 1 x vocab_size>
def sequence_to_tensor(sequence):
    tensor = torch.zeros(len(sequence), 1, vocab_size)
    for index in range(len(sequence)):
        tensor[index][0][word_to_index(sequence[index])] = 1
    return tensor

In [15]:
import torch.nn as nn

class LSTM_model(nn.Module):
    '''
    Simple LSTM model to generate bedtime story titles.
    Arguments:
        - input_size - should be equal to the vocabulary size
        - output_size - should be equal to the vocabulary size
        - hidden_size - hyperparameter, size of the hidden state of LSTM.
    '''
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM_model, self).__init__()

        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size, hidden_size)
        self.linear = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output, hidden = self.lstm(input.view(1, 1, -1), hidden)
        
        output = self.linear(output[-1].view(1, -1))
        
        output = self.softmax(output)
        return output, hidden

    # the initialization of the hidden state
    # using cuda speeds up the computation
    def initHidden(self, device):
        return (torch.zeros(1, 1, num_hidden).to(device), torch.zeros(1, 1, num_hidden).to(device))

num_hidden = 128  # hyperparameter

In [16]:
rnn = LSTM_model(vocab_size, num_hidden, vocab_size)

state_dict = torch.load(title_generator_path)
rnn.load_state_dict(state_dict)
rnn = rnn.to(device)

In [17]:
# Generates title given the first word 
def generate_title(first_word):
    
    max_num_words = 5 # in a title
    sentence = [first_word]

    # Initialize input step and hidden state
    input_tensor = word_to_tensor(first_word)
    hidden = (torch.zeros(1, 1, num_hidden).to(device), torch.zeros(1, 1, num_hidden).to(device))
    output_word = None
    i = 1

    # Generate title
    while output_word != '.' and i < max_num_words:
        input_tensor = input_tensor.to(device)
        output, next_hidden = rnn(input_tensor[0], hidden)
        final_output = output.clone().to(device)

        # Use the probabilities from the output to choose the next word
        probabilities = final_output.softmax(dim=1).detach().cpu().numpy().ravel()
        word_index = np.random.choice(range(vocab_size), p = probabilities)

        output_word = [key for (key, value) in VOCABULARY.items() if value == word_index][0]
        sentence.append(output_word)

        # update
        input_tensor = word_to_tensor(output_word)
        hidden = next_hidden
        i += 1

    if sentence[-1] != ".": sentence.append(".")

    return sentence

In [18]:
num_titles = 10

print(f"Generating {num_titles} titles...\n")
for i in range(num_titles):
    sampled_title = generate_title("white") # plug in word from the VOCABULARY
    title = ' '.join(sampled_title)
    print(title)

Generating 10 titles...

white jorinda rabbit .
white colonies stick franklin service .
white morlocks terrible xxx u .
white mouser did .
white fireside chief .
white magician visitor two .
white burst plain .
white babies meadow .
white story home .
white xxxii miser .


In [19]:
# Load the checkpoint 
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

model2 = GPT2LMHeadModel.from_pretrained('gpt2', force_download=True)

saved_state_dict = torch.load(story_generator_path)

model2.load_state_dict(saved_state_dict)  

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

<All keys matched successfully>

In [20]:
def complete_prompt(prompt, min_length=50, max_length=100, top_p=0.8, temperature=1.0):
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    outputs = model2.generate(
        inputs,
        do_sample=True,
        min_length=min_length,
        max_length=max_length,
        top_p=top_p,
        temperature=temperature,
        pad_token_id=tokenizer.eos_token_id
    )
    completed_story = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return completed_story


In [22]:
complete_prompt("THERE was once a poor prince who had a kingdom, but it was a very small one", min_length=100, max_length=300)

'THERE was once a poor prince who had a kingdom, but it was a very small one. He lived very hard and had no children, and none but his own. When the king died, he was buried at the cemetery, and he went to quart, and quarrelled with the soldiers. Then he told them what had happened, and was so furious that they did not believe him. The people of the province came to him, and they said to him, "You are a prince of great power, and must give up your kingdom to the King." "What!" said the king, "if I should do that, I would give up my kingdom to my father, who has stolen all my jewels." So the people of the province came to him and said to him, "We will put up with him, and we will give him over to you." So he did, and went and put up with them all, and they took him to his mother, and said, "You must give him over to the King, and he will not let you leave your land, nor let anyone else take your possessions." So he did, and went and put up with them all, and he took them all and he took

In [24]:
# stop words that don't add context to title
non_info_words = ['and', 'able', 'about', 'abroad', 'according', 'accordingly', 'across', 'actually', 'adj',
              'after', 'afterwards', 'again', 'ahead', "ain't", 'all', 'allow', 'allows', 'almost', 'along', 'alongside',
              'already', 'also', 'although', 'always', 'am', 'amid', 'amidst', 'among', 'amongst', 'an', 'and', 'another',
              'any', 'anyhow', 'anything', 'anyway', 'anyways', 'appear', 'appreciate', 'appropriate', 'are', "aren't", 'around',
              'as', "a's", 'aside', 'ask', 'asking', 'associated', 'at', 'available', 'away', 'awfully', 'back', 'backward',
              'backwards', 'be', 'became', 'because', 'become', 'becomes', 'becoming', 'been', 'before', 'beforehand', 'begin',
              'behind', 'being', 'believe', 'below', 'beside', 'besides', 'best', 'better', 'between', 'beyond', 'both',
              'brief', 'but', 'by', 'came', 'can', 'cannot', 'cant', "can't", 'caption', 'cause', 'causes', 'certain',
              'certainly', 'changes', 'clearly', "c'mon", 'co', 'co.', 'com', 'come', 'comes', 'concerning', 'consequently',
              'consider', 'considering', 'contain', 'containing', 'contains', 'corresponding', 'could', "couldn't", 'course',
              "c's", 'currently', 'dare', "daren't", 'definitely', 'described', 'despite', 'did', "didn't", 'different',
              'directly', 'do', 'does', "doesn't", 'doing', 'done', "don't", 'down', 'downwards', 'during', 'each', 'edu',
              'eg', 'eight', 'eighty', 'either', 'else', 'elsewhere', 'end', 'ending', 'enough', 'entirely', 'especially',
              'et', 'etc', 'even', 'ever', 'evermore', 'every', 'everything', 'everywhere', 'ex', 'exactly', 'example', 'except',
              'fairly', 'few', 'fewer', 'fifth', 'first', 'five', 'followed', 'following', 'follows', 'for', 'forever', 'formerly',
              'forth', 'forward', 'found', 'four', 'from', 'further', 'furthermore', 'get', 'gets', 'getting', 'given', 'gives',
              'go', 'goes', 'going', 'gone', 'got', 'gotten', 'greetings', 'had', "hadn't", 'half', 'happens', 'hardly', 'has',
              "hasn't", 'have', "haven't", 'having', 'he', "he'd", "he'll", 'hello', 'help', 'hence', 'her', 'here', 'hereafter',
              'hereby', 'herein', "here's", 'hereupon', 'hers', 'herself', "he's", 'hi', 'him', 'his', 'hither', 'hopefully', 'how',
              'howbeit', 'however', 'hundred', "i'd", 'ie', 'if', 'ignored', "i'll", "i'm", 'immediate', 'in', 'inasmuch', 'inc',
              'inc.', 'indeed', 'indicate', 'indicated', 'indicates', 'inner', 'inside', 'insofar', 'instead', 'into', 'inward',
              'is', "isn't", 'it', "it'd", "it'll", 'its', "it's", 'itself', "i've", 'just', 'k', 'keep', 'keeps', 'kept', 'know',
              'known', 'knows', 'lately', 'later', 'latter', 'latterly', 'least', 'less', 'lest', 'let', "let's", 'like', 'liked',
              'likely', 'likewise', 'little', 'look', 'looking', 'looks', 'low', 'lower', 'ltd', 'made', 'mainly', 'make', 'makes',
              'many', 'may', 'maybe', "mayn't", 'me', 'mean', 'meantime', 'meanwhile', 'merely', 'might', "mightn't", 'mine',
              'minus', 'miss', 'more', 'moreover', 'most', 'mostly', 'mr', 'mrs', 'much', 'must', "mustn't", 'my', 'myself',
              'name', 'namely', 'nd', 'nearly', 'necessary', 'need', "needn't", 'needs', 'neither', 'never', 'neverf', 'neverless',
              'nevertheless', 'new', 'next', 'nine', 'ninety', 'no', 'non', 'none', 'nonetheless', 'noone', 'no-one', 'nor',
              'normally', 'not', 'nothing', 'notwithstanding', 'novel', 'now', 'nowhere', 'obviously', 'of', 'off', 'often', 'oh',
              'ok', 'okay', 'on', 'once', 'one', 'ones', "one's", 'onto', 'opposite', 'or', 'other', 'others', 'otherwise', 'ought',
              "oughtn't", 'our', 'ours', 'ourselves', 'out', 'over', 'overall', 'own', 'particular', 'particularly', 'past', 'per',
              'perhaps', 'placed', 'please', 'plus', 'possible', 'presumably', 'probably', 'provided', 'provides', 'que', 'quite',
              'qv', 'rather', 'rd', 're', 'really', 'reasonably', 'recent', 'recently', 'regarding', 'regardless', 'regards',
              'relatively', 'respectively', 'right', 'said', 'same', 'saw', 'say', 'saying', 'says', 'second', 'secondly', 'seem',
              'seemed', 'seeming', 'seems', 'seen', 'self', 'selves', 'sensible', 'sent', 'serious', 'seriously', 'seven', 'several',
              'shall', "shan't", 'she', "she'd", "she'll", "she's", 'should', "shouldn't", 'since', 'six', 'so', 'some', 'somebody',
              'someday', 'somehow', 'someone', 'something', 'sometime', 'sometimes', 'somewhat', 'somewhere', 'soon', 'sorry',
              'specified', 'specify', 'specifying', 'still', 'sub', 'such', 'sup', 'sure', 'take', 'taken', 'taking', 'tell',
              'tends', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', "that'll", 'thats', "that's", "that've", 'the', 'their',
              'theirs', 'them', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', "there'd", 'therefore', 'therein',
              "there'll", "there're", 'theres', "there's", 'thereupon', "there've", 'these', 'they', "they'd", "they'll", "they're",
              "they've", 'thing', 'things', 'third', 'thirty', 'this', 'thorough', 'thoroughly', 'those', 'though', 'three',
              'through', 'throughout', 'thru', 'thus', 'till', 'to', 'together', 'too', 'took', 'toward', 'towards', 'tried',
              'tries', 'truly', 'try', 'trying', "t's", 'twice', 'two', 'un', 'under', 'underneath', 'undoing', 'unfortunately',
              'unless', 'unlike', 'unlikely', 'until', 'unto', 'up', 'upon', 'upwards', 'us', 'use', 'used', 'useful', 'uses',
              'using', 'usually', 'v', 'value', 'various', 'versus', 'very', 'via', 'viz', 'vs', 'want', 'wants', 'was', "wasn't",
              'way', 'we', "we'd", 'welcome', 'well', "we'll", 'went', 'were', "we're", "weren't", "we've", 'what', 'whatever',
              "what'll", "what's", "what've", 'when', 'whence', 'whenever', 'where', 'whereafter', 'whereas', 'whereby', 'wherein',
              "where's", 'whereupon', 'wherever', 'whether', 'which', 'whichever', 'while', 'whilst', 'whither', 'who', "who'd",
              'whoever', 'whole', "who'll", 'whom', 'whomever', "who's", 'whose', 'why', 'will', 'willing', 'wish', 'with',
              'within', 'without', 'wonder', "won't", 'would', "wouldn't", 'yes', 'yet', 'you', "you'd", "you'll", 'your',
              "you're", 'yours', 'yourself', 'yourselves', "you've", 'zero', 'a', "how's", 'i', "when's", "why's", 'b', 'c',
              'd', 'e', 'f', 'g', 'h', 'j', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'uucp', 'w', 'x', 'y', 'z', 'I',
              'www', 'amount', 'bill', 'bottom', 'call', 'computer', 'con', 'couldnt', 'cry', 'de', 'describe', 'detail', 'due',
              'eleven', 'empty', 'fifteen', 'fifty', 'fill', 'find', 'forty', 'front', 'full', 'give', 'hasnt', 'herse', 'himse',
              'interest', 'itse”', 'mill', 'myse”', 'part', 'put', 'side', 'sincere', 'sixty', 'system', 'ten', 'thick', 'thin',
              'top', 'twelve', 'twenty', 'abst', 'accordance', 'act', 'added', 'affected', 'affecting', 'affects', 'ah', 'announce',
              'anymore', 'apparently', 'approximately', 'aren', 'arent', 'arise', 'auth', 'beginning', 'beginnings', 'begins',
              'biol', 'briefly', 'ca', 'date', 'ed', 'effect', 'et-al', 'ff', 'fix', 'gave', 'giving', 'heres', 'hes', 'hid', 'id',
              'im', 'immediately', 'importance', 'important', 'index', 'information', 'itd', 'keys', 'kg', 'km', 'largely', 'lets',
              'line', "'ll", 'means', 'mg', 'million', 'ml', 'mug', 'na', 'nay', 'necessarily', 'nos', 'noted', 'obtain',
              'obtained', 'omitted', 'ord', 'owing', 'page', 'pages', 'poorly', 'possibly', 'potentially', 'pp', 'predominantly',
              'previously', 'primarily', 'promptly', 'proud', 'readily', 'related', 'research', 'resulted', 'resulting', 'results',
              'sec', 'section', 'shed', 'shes', 'showed', 'shown', 'showns', 'shows', 'significant', 'significantly', 'similar',
              'similarly', 'slightly', 'somethan', 'specifically', 'state', 'states', 'stop', 'strongly', 'substantially',
              'successfully', 'sufficiently', 'suggest', 'thered', 'thereof', 'therere', 'thereto', 'theyd', 'theyre', 'thou',
              'thoughh', 'thousand', 'throug', 'til', 'tip', 'ts', 'ups', 'usefully', 'usefulness', "'ve", 'vol', 'vols', 'wed',
              'whats', 'wheres', 'whim', 'whod', 'whos', 'widely', 'words', 'youd', 'youre']

In [27]:
from time import sleep


def typewriter_effect(words, speed):
    for char in words:
        sleep(speed)
        print(char, end='', flush=True)

from string import punctuation # to format the story later ...

def generate_story(prompt):
    completed_story = complete_prompt(prompt)
    r = re.compile(r'[{}]+'.format(re.escape(punctuation)))
    story_just_words = r.sub('', completed_story)
    story_just_words = story_just_words.lower().split(" ")
    word_occurences = {}
    # get the words that occur the most often in the dictionary
    for word in  story_just_words:
        if not (word in non_info_words):
            if word not in word_occurences:
                word_occurences[word] = 1
            else:
                word_occurences[word] += 1

    # sorting the dictionary from least to most occurences
    word_occurences = {k: v for k, v in sorted(word_occurences.items(), key=lambda item: item[1])}

    # Get the words from most to least occurrences
    story_vocab = list(word_occurences.keys())[::-1]

    i = 0
    first_word = story_vocab[i]
    while first_word not in VOCABULARY:
        i += 1
        if i == len(story_vocab):
          first_word = "the"
          break
        first_word = story_vocab[i]

    # The first word in the title will be the word that occurs the most often in the story
    # get generated title and include it to the final title
    final_title = generate_title(first_word)
    final_title = ' '.join(final_title).upper()
    final_title += "\n"
    
    # return the completed title and the story
    typewriter_effect(final_title, 0.1)
    typewriter_effect(completed_story, 0.1)


if __name__ == "__main__":
    print("------------ HELLO! I am your personal bedtime story assitant ------------\n")
    print("--------------------------------------------------------------------------\n")
    prompt = input('Please write the beginning of your bedtime story and we will help you write the rest: \n')
    print("--------------------------------------------------------------------------\n")
    typewriter_effect("COMPLETING STORY", 0.1)
    typewriter_effect("... ...  ...        ... ...  ...\n", 0.2)
    
    typewriter_effect("GENERATING TITLE", 0.1)
    typewriter_effect("... ...  ...        ... ...  ...\n", 0.2)
    print("--------------------------------------------------------------------------\n")
    print("--------------------------------------------------------------------------\n")
    generate_story(prompt)

------------ HELLO! I am your personal bedtime story assitant ------------

--------------------------------------------------------------------------

Please write the beginning of your bedtime story and we will help you write the rest: 
A family of cats entered a kingdom
--------------------------------------------------------------------------

COMPLETING STORY... ...  ...        ... ...  ...
GENERATING TITLE... ...  ...        ... ...  ...
--------------------------------------------------------------------------

--------------------------------------------------------------------------

KING HORN PERUSAL .
A family of cats entered a kingdom on a hunting expedition and were hunting in a cove with some of their neighbors. The owner of the home was so distressed that he asked the animals for money, and they would not let him. They made a bargain and were out of the woods in a little time. The king gave them money, and they returned home and lived in peace, and the cats were happy. T