In [None]:
import nltk
from nltk.corpus import gutenberg
from nltk.util import ngrams
from collections import Counter, defaultdict
import random

# Download required NLTK resources
nltk.download('gutenberg')
nltk.download('punkt')
nltk.download('punkt_tab')

# Load and preprocess the text
text = gutenberg.raw('austen-emma.txt').lower()
tokens = nltk.word_tokenize(text)

# Create Unigram, Bigram, and Trigram models
unigram_model = Counter(tokens)
bigram_model = Counter(ngrams(tokens, 2))
trigram_model = Counter(ngrams(tokens, 3))

# Total unigrams for probability estimation
total_unigrams = sum(unigram_model.values())

# Probability Estimation Functions
def predict_unigram():
    return random.choices(list(unigram_model.keys()), weights=unigram_model.values(), k=1)[0]

def predict_bigram(word):
    candidates = {pair[1]: count for pair, count in bigram_model.items() if pair[0] == word}
    if not candidates:
        return predict_unigram()
    return max(candidates, key=candidates.get)

def predict_trigram(w1, w2):
    candidates = {trio[2]: count for trio, count in trigram_model.items() if trio[0] == w1 and trio[1] == w2}
    if not candidates:
        return predict_bigram(w2)
    return max(candidates, key=candidates.get)

# Prediction Interface
def word_prediction():
    print("N-Gram Word Prediction")
    print("1. Unigram\n2. Bigram\n3. Trigram")
    choice = input("Select model (1/2/3): ")

    if choice == "1":
        print("Predicted Word:", predict_unigram())

    elif choice == "2":
        word = input("Enter a word: ").lower()
        print("Predicted Word:", predict_bigram(word))

    elif choice == "3":
        w1 = input("Enter first word: ").lower()
        w2 = input("Enter second word: ").lower()
        print("Predicted Word:", predict_trigram(w1, w2))

    else:
        print("Invalid input")

# Run the prediction
word_prediction()

[nltk_data] Downloading package gutenberg to /root/nltk_data...
[nltk_data]   Package gutenberg is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


N-Gram Word Prediction
1. Unigram
2. Bigram
3. Trigram
Select model (1/2/3): 3
Enter first word: i
Enter second word: am
Predicted Word: sure
