In [None]:
!pip install nltk

In [None]:
import nltk
from nltk.util import ngrams
from nltk import FreqDist
from collections import defaultdict

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('punkt_tab')

# Step 1: Preprocess text (tokenization and cleaning)
def preprocess_text(text):
    # Tokenize and convert to lowercase
    tokens = nltk.word_tokenize(text.lower())
    return tokens

# Step 2: Generate N-grams (in this case, trigrams)
def generate_ngrams(text, n=3):
    tokens = preprocess_text(text)
    n_grams = list(ngrams(tokens, n))
    return n_grams

# Step 3: Build a frequency distribution of N-grams
def build_ngram_model(corpus, n=3):
    ngram_model = defaultdict(FreqDist)
    for text in corpus:
        n_grams = generate_ngrams(text, n)
        for ngram in n_grams:
            prefix = ngram[:-1]
            next_word = ngram[-1]
            ngram_model[prefix][next_word] += 1
    return ngram_model

# Step 4: Predict the next word(s) based on the prefix input
def predict_next_word(prefix, ngram_model, n=3):
    prefix = tuple(preprocess_text(prefix)[-n+1:])  # Use the last n-1 words as the prefix
    if prefix in ngram_model:
        # Get the most probable next word
        next_word = ngram_model[prefix].max()
        return next_word
    else:
        return "No prediction available"

# Example corpus for training the model
corpus = [
    "I love programming in Python",
    "Python is great for data science",
    "I enjoy machine learning",
    "Machine learning is a fascinating field",
    "I love to learn new things"
]

# Step 5: Train the N-gram model (trigram model in this case)
ngram_model = build_ngram_model(corpus, n=3)

# Step 6: Take user input for auto-completion
while True:
    input_text = input("Enter a phrase (or 'quit' to exit): ").strip()
    if input_text.lower() == "quit":
        print("Exiting...")
        break
    print(f"Input: {input_text}")
    prediction = predict_next_word(input_text, ngram_model, n=3)
    print(f"Predicted next word: {prediction}")