1: Install & Import

In [1]:
!pip install nltk




[notice] A new release of pip is available: 25.2 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [2]:
import nltk
import random
from nltk.tokenize import word_tokenize
from collections import defaultdict, Counter

nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\samik\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

2: Sample Training Corpus

In [3]:
corpus = """
I love machine learning
I love artificial intelligence
Machine learning is amazing
Artificial intelligence is the future
I love deep learning
Deep learning is powerful
"""


3: Preprocess Text

In [4]:
tokens = word_tokenize(corpus.lower())

4: Build Trigram Model

In [5]:
def build_ngram_model(tokens, n=3):
    model = defaultdict(Counter)
    
    for i in range(len(tokens) - n + 1):
        context = tuple(tokens[i:i+n-1])
        next_word = tokens[i+n-1]
        model[context][next_word] += 1
        
    return model

trigram_model = build_ngram_model(tokens, 3)


5: Add Laplace Smoothing & Probability

In [6]:
def predict_next_word(model, context, vocab_size):
    context = tuple(context)
    
    if context not in model:
        return None
    
    word_counts = model[context]
    total_count = sum(word_counts.values())
    
    probabilities = {}
    
    for word in word_counts:
        # Laplace smoothing
        probabilities[word] = (word_counts[word] + 1) / (total_count + vocab_size)
    
    return sorted(probabilities.items(), key=lambda x: x[1], reverse=True)


6: Create Vocabulary

In [7]:
vocab = set(tokens)
vocab_size = len(vocab)


7: Auto-Complete Function

In [8]:
def autocomplete(text, model, vocab_size, n=3, top_k=3):
    words = word_tokenize(text.lower())
    
    if len(words) < n-1:
        return "Not enough words for prediction"
    
    context = words[-(n-1):]
    predictions = predict_next_word(model, context, vocab_size)
    
    if predictions:
        return predictions[:top_k]
    else:
        return "No prediction available"


8: Test the Auto-Complete

In [9]:
input_text = "i love"
suggestions = autocomplete(input_text, trigram_model, vocab_size)

print("Input:", input_text)
print("Suggestions:", suggestions)


Input: i love
Suggestions: [('machine', 0.13333333333333333), ('artificial', 0.13333333333333333), ('deep', 0.13333333333333333)]
