In [7]:
import nltk
from nltk.util import ngrams
from collections import Counter, defaultdict
import re

nltk.download('punkt')

text = """
Natural language processing (NLP) is a machine learning technology that gives computers the ability to interpret, manipulate, 
and comprehend human language.
"""

def tokenize(text):
    text = text.lower()
    words = nltk.word_tokenize(text)
    return words

def compute_bigram_model(tokens):
    bigrams = ngrams(tokens, 2)
    bigram_counts = Counter(bigrams)
    unigram_counts = Counter(tokens)
    
    bigram_model = defaultdict(lambda: defaultdict(float))
    
    for (word1, word2), count in bigram_counts.items():
        bigram_model[word1][word2] = count / unigram_counts[word1]
        
    return bigram_model

def predict_next_word(bigram_model, word):
    if word in bigram_model:
        next_words = bigram_model[word]
        sorted_next_words = sorted(next_words.items(), key=lambda x: x[1], reverse=True)
        return sorted_next_words
    else:
        return []

tokens = tokenize(text)

bigram_model = compute_bigram_model(tokens)

input_word = "language"
predictions = predict_next_word(bigram_model, input_word)

print(f"Next word predictions for '{input_word}':")
for next_word, prob in predictions:
    print(f"{next_word}: {prob:.4f}")


Next word predictions for 'language':
processing: 0.5000
.: 0.5000


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Welcome\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
