In [1]:
import nltk
nltk.download('reuters')
nltk.download('punkt')
nltk.download('punkt_tab')


[nltk_data] Downloading package reuters to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [2]:
import nltk
import random
from nltk.corpus import reuters
from nltk.tokenize import word_tokenize
from collections import defaultdict, Counter

# Load dataset
text = reuters.raw()
tokens = word_tokenize(text.lower())

# Remove non-alphabetic tokens
tokens = [word for word in tokens if word.isalpha()]

# ----------------------------
# Build Bigram Model
# ----------------------------
bigram_model = defaultdict(Counter)

for i in range(len(tokens)-1):
    w1, w2 = tokens[i], tokens[i+1]
    bigram_model[w1][w2] += 1

# ----------------------------
# Build Trigram Model
# ----------------------------
trigram_model = defaultdict(Counter)

for i in range(len(tokens)-2):
    w1, w2, w3 = tokens[i], tokens[i+1], tokens[i+2]
    trigram_model[(w1, w2)][w3] += 1

# ----------------------------
# Predict Next Word (Bigram)
# ----------------------------
def predict_bigram(word, top_n=5):
    if word in bigram_model:
        return bigram_model[word].most_common(top_n)
    else:
        return "No prediction available."

# ----------------------------
# Predict Next Word (Trigram)
# ----------------------------
def predict_trigram(word1, word2, top_n=5):
    if (word1, word2) in trigram_model:
        return trigram_model[(word1, word2)].most_common(top_n)
    else:
        return "No prediction available."

# ----------------------------
# Interactive Auto-Complete
# ----------------------------
while True:
    text_input = input("\nEnter a phrase (or type 'exit'): ").lower()
    if text_input == "exit":
        break

    words = text_input.split()

    if len(words) >= 2:
        print("Trigram Predictions:", predict_trigram(words[-2], words[-1]))
    else:
        print("Bigram Predictions:", predict_bigram(words[-1]))



Enter a phrase (or type 'exit'): the government
Trigram Predictions: [('to', 58), ('has', 52), ('securities', 51), ('had', 41), ('would', 35)]

Enter a phrase (or type 'exit'): exit
