In [1]:
import nltk
import random
from collections import defaultdict, Counter

In [2]:
nltk.download('punkt_tab')
nltk.download('gutenberg')
nltk.download('punkt')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package gutenberg to /root/nltk_data...
[nltk_data]   Unzipping corpora/gutenberg.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [3]:
class NGramLM:
    def __init__(self, n=5):
        self.n = n
        self.model = defaultdict(Counter)

    def train(self, words):
        words = [w.lower() for w in words]
        for i in range(len(words) - self.n + 1):
            context = tuple(words[i:i + self.n - 1])
            target = words[i + self.n - 1]
            self.model[context][target] += 1

    def generate(self, seed, max_words=25):
        result = nltk.word_tokenize(seed.lower())
        for _ in range(max_words):
            found = False
            for size in range(self.n - 1, 0, -1):
                context = tuple(result[-size:])
                if context in self.model:
                    choices = self.model[context]
                    result.append(random.choices(list(choices.keys()), weights=choices.values())[0])
                    found = True
                    break
            if not found:
                context = random.choice(list(self.model.keys()))
                choices = self.model[context]
                result.append(random.choice(list(choices.keys())))
        return " ".join(result).capitalize()

In [4]:
raw = nltk.corpus.gutenberg.words("austen-emma.txt")
model = NGramLM(n=5)
model.train(raw)

In [5]:
prompts = ["The day was very", "Deep into that", "It was a"]
for p in prompts:
    print(f"Input: {p}\nOutput: {model.generate(p)}\n")

Input: The day was very
Output: The day was very , way before happiness and was of she husband and in other had mr if think enjoyment and gardeners striking . . , been the

Input: Deep into that
Output: Deep into that .-- unwelcome satisfied tell ; as also grand , his wonder to young than could one you and rid i with a am enough weston

Input: It was a
Output: It was a opened i a , sat a towards , of " harriet , might , a of i english her own been that quickness wandering i

