**Write a better auto-complete algorithm using an N-gram model**

In [None]:
import nltk
import random
from collections import Counter
from nltk.util import ngrams
from nltk.tokenize import word_tokenize

# Download NLTK resources
nltk.download("punkt")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
# Sample training text
text = """
Machine learning is a field of artificial intelligence that enables computers to learn from data.
Natural language processing is a subfield of AI that focuses on understanding human language.
Deep learning is a type of machine learning that uses neural networks to model complex patterns.
Artificial intelligence is the future of technology.
"""

In [None]:
# Tokenize and preprocess text
tokens = word_tokenize(text.lower())

# Create bigrams and trigrams
bigrams = list(ngrams(tokens, 2))
trigrams = list(ngrams(tokens, 3))

# Compute frequency counts for bigrams and trigrams
bigram_freq = Counter(bigrams)
trigram_freq = Counter(trigrams)

# Convert to probability distribution (normalize counts)
bigram_prob = {k: v / sum(bigram_freq.values()) for k, v in bigram_freq.items()}
trigram_prob = {k: v / sum(trigram_freq.values()) for k, v in trigram_freq.items()}

In [None]:
# Function to predict next word using bigram model
def predict_next_word_bigram(previous_word):
    candidates = {k[1]: v for k, v in bigram_prob.items() if k[0] == previous_word}
    return max(candidates, key=candidates.get) if candidates else None

# Function to predict next word using trigram model
def predict_next_word_trigram(prev_two_words):
    candidates = {k[2]: v for k, v in trigram_prob.items() if k[:2] == prev_two_words}
    return max(candidates, key=candidates.get) if candidates else None

# Function to generate a sequence of words
def generate_text(starting_text, num_words=10, use_trigram=True):
    words = word_tokenize(starting_text.lower())

    for _ in range(num_words):
        if use_trigram and len(words) >= 2:
            next_word = predict_next_word_trigram((words[-2], words[-1]))
        else:
            next_word = predict_next_word_bigram(words[-1])

        if next_word:
            words.append(next_word)
        else:
            break  # Stop if no valid predictions

    return " ".join(words)


In [None]:

# Example usage
starting_text = "deep learning"
generated_text = generate_text(starting_text, num_words=10)

print(f"Generated Text: {generated_text}")


Generated Text: deep learning is a field of artificial intelligence that enables computers to
