Q7) Write a better auto-complete algorithm using an N-gram model (similar models are used for
translation, determining the author of a text, and speech recognition)

In [None]:
!pip install nltk



In [1]:
import nltk
from nltk.util import ngrams
from nltk import FreqDist
from collections import defaultdict

# Download required NLTK resources
nltk.download('punkt')
nltk.download('punkt_tab')

# Step 1: Process text by tokenizing and cleaning
def preprocess_text(text):
    # Convert text to lowercase and split into tokens
    tokens = nltk.word_tokenize(text.lower())
    return tokens

# Step 2: Create N-grams (trigrams in this case)
def generate_ngrams(text, n=3):
    tokens = preprocess_text(text)
    n_grams = list(ngrams(tokens, n))
    return n_grams

# Step 3: Construct a frequency model for N-grams
def build_ngram_model(corpus, n=3):
    ngram_model = defaultdict(FreqDist)
    for text in corpus:
        n_grams = generate_ngrams(text, n)
        for ngram in n_grams:
            prefix = ngram[:-1]
            next_word = ngram[-1]
            ngram_model[prefix][next_word] += 1
    return ngram_model

# Step 4: Suggest the most probable next word
def predict_next_word(prefix, ngram_model, n=3):
    prefix = tuple(preprocess_text(prefix)[-n+1:])  # Consider only the last n-1 words
    if prefix in ngram_model:
        # Retrieve the most likely next word
        next_word = ngram_model[prefix].max()
        return next_word
    else:
        return "No suggestion available"


# Sample dataset for training the model
corpus = [
    "I enjoy coding in Python",
    "Python is a powerful tool for data analysis",
    "I find machine learning interesting",
    "Machine learning is an evolving discipline",
    "I like exploring new concepts"
]

# Step 5: Train the trigram model
ngram_model = build_ngram_model(corpus, n=3)

# Step 6: Accept user input for word prediction
while True:
    input_text = input("Type a phrase (or 'quit' to stop): ").strip()
    if input_text.lower() == "quit":
        print("Closing the program...")
        break
    print(f"Your input: {input_text}")
    prediction = predict_next_word(input_text, ngram_model, n=3)
    print(f"Suggested next word: {prediction}")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Type a phrase (or 'quit' to stop): I enjoy
Your input: I enjoy
Suggested next word: coding
Type a phrase (or 'quit' to stop): Python is
Your input: Python is
Suggested next word: a
Type a phrase (or 'quit' to stop): Coding in
Your input: Coding in
Suggested next word: python
Type a phrase (or 'quit' to stop): New concepts
Your input: New concepts
Suggested next word: No suggestion available
Type a phrase (or 'quit' to stop): quit
Closing the program...
