In [1]:
!pip install nltk



In [2]:
import nltk
from nltk.util import ngrams
from collections import Counter, defaultdict

In [3]:
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [4]:
text = """
Natural language processing is a fascinating field.
Natural language processing helps computers understand text.
Machine learning and natural language processing are closely related.
"""

In [5]:
from nltk.tokenize import word_tokenize

tokens = word_tokenize(text.lower())
print(tokens[:20])

['natural', 'language', 'processing', 'is', 'a', 'fascinating', 'field', '.', 'natural', 'language', 'processing', 'helps', 'computers', 'understand', 'text', '.', 'machine', 'learning', 'and', 'natural']


In [6]:
bigrams = list(ngrams(tokens, 2))
print(bigrams[:10])

[('natural', 'language'), ('language', 'processing'), ('processing', 'is'), ('is', 'a'), ('a', 'fascinating'), ('fascinating', 'field'), ('field', '.'), ('.', 'natural'), ('natural', 'language'), ('language', 'processing')]


In [7]:
bigram_freq = Counter(bigrams)
print(bigram_freq.most_common(10))

[(('natural', 'language'), 3), (('language', 'processing'), 3), (('processing', 'is'), 1), (('is', 'a'), 1), (('a', 'fascinating'), 1), (('fascinating', 'field'), 1), (('field', '.'), 1), (('.', 'natural'), 1), (('processing', 'helps'), 1), (('helps', 'computers'), 1)]


In [8]:
model = defaultdict(list)

for w1, w2 in bigrams:
    model[w1].append(w2)

In [9]:
import random

def predict_next_word(word):
    word = word.lower()

    if word in model:
        return Counter(model[word]).most_common(1)[0][0]
    else:
        return "No prediction available"

In [10]:
print(predict_next_word("natural"))
print(predict_next_word("language"))
print(predict_next_word("machine"))

language
processing
learning


In [11]:
trigrams = list(ngrams(tokens, 3))

trigram_model = defaultdict(list)

for w1, w2, w3 in trigrams:
    trigram_model[(w1, w2)].append(w3)

In [12]:
def predict_next_word_trigram(w1, w2):
    key = (w1.lower(), w2.lower())

    if key in trigram_model:
        return Counter(trigram_model[key]).most_common(1)[0][0]
    else:
        return "No prediction available"

In [13]:
print(predict_next_word_trigram("natural", "language"))
print(predict_next_word_trigram("machine", "learning"))

processing
and


In [18]:
w1 = input("Enter first word: ")
w2 = input("Enter second word: ")

print("Prediction:", predict_next_word_trigram(w1, w2))

Enter first word: Natural
Enter second word: Language
Prediction: processing
