### Importing libraries

In [2]:
import nltk
from nltk import word_tokenize
from nltk.util import ngrams
from collections import Counter


In [3]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

### Corpus

In [5]:
text = input("Enter the text sample:  ").strip()

Enter the text sample:  I want to make the most of this cultural relevance or success or whatever you want to call it, because it's not going to last.


In [6]:
# Tokenize the text into words
token = word_tokenize(text.lower())
print(token)

['i', 'want', 'to', 'make', 'the', 'most', 'of', 'this', 'cultural', 'relevance', 'or', 'success', 'or', 'whatever', 'you', 'want', 'to', 'call', 'it', ',', 'because', 'it', "'s", 'not', 'going', 'to', 'last', '.']


### 1. Unigrams

In [7]:
unigram = list(ngrams(token, 1))
print("Unigram:", unigram)

Unigram: [('i',), ('want',), ('to',), ('make',), ('the',), ('most',), ('of',), ('this',), ('cultural',), ('relevance',), ('or',), ('success',), ('or',), ('whatever',), ('you',), ('want',), ('to',), ('call',), ('it',), (',',), ('because',), ('it',), ("'s",), ('not',), ('going',), ('to',), ('last',), ('.',)]


### 2. Bigrams

In [8]:
bigram = list(ngrams(token, 2))
print("Bigrams:", list(bigram))

Bigrams: [('i', 'want'), ('want', 'to'), ('to', 'make'), ('make', 'the'), ('the', 'most'), ('most', 'of'), ('of', 'this'), ('this', 'cultural'), ('cultural', 'relevance'), ('relevance', 'or'), ('or', 'success'), ('success', 'or'), ('or', 'whatever'), ('whatever', 'you'), ('you', 'want'), ('want', 'to'), ('to', 'call'), ('call', 'it'), ('it', ','), (',', 'because'), ('because', 'it'), ('it', "'s"), ("'s", 'not'), ('not', 'going'), ('going', 'to'), ('to', 'last'), ('last', '.')]


### 3. Trigrams

In [9]:
trigram = list(ngrams(token, 3))
print("Trigram:", list(trigram))


Trigram: [('i', 'want', 'to'), ('want', 'to', 'make'), ('to', 'make', 'the'), ('make', 'the', 'most'), ('the', 'most', 'of'), ('most', 'of', 'this'), ('of', 'this', 'cultural'), ('this', 'cultural', 'relevance'), ('cultural', 'relevance', 'or'), ('relevance', 'or', 'success'), ('or', 'success', 'or'), ('success', 'or', 'whatever'), ('or', 'whatever', 'you'), ('whatever', 'you', 'want'), ('you', 'want', 'to'), ('want', 'to', 'call'), ('to', 'call', 'it'), ('call', 'it', ','), ('it', ',', 'because'), (',', 'because', 'it'), ('because', 'it', "'s"), ('it', "'s", 'not'), ("'s", 'not', 'going'), ('not', 'going', 'to'), ('going', 'to', 'last'), ('to', 'last', '.')]


### 4. Bigram Probabilities

In [10]:
b_count = Counter(bigram)
total = len(bigram)
b_probs = {}

for bi, count in b_count.items():
    f= bi[0]
    prob = count / token.count(f)
    b_probs[bi] = prob

print("\nBigram Probabilities:")
for b, prob in b_probs.items():
    print(b, ":", prob)


Bigram Probabilities:
('i', 'want') : 1.0
('want', 'to') : 1.0
('to', 'make') : 0.3333333333333333
('make', 'the') : 1.0
('the', 'most') : 1.0
('most', 'of') : 1.0
('of', 'this') : 1.0
('this', 'cultural') : 1.0
('cultural', 'relevance') : 1.0
('relevance', 'or') : 1.0
('or', 'success') : 0.5
('success', 'or') : 1.0
('or', 'whatever') : 0.5
('whatever', 'you') : 1.0
('you', 'want') : 1.0
('to', 'call') : 0.3333333333333333
('call', 'it') : 1.0
('it', ',') : 0.5
(',', 'because') : 1.0
('because', 'it') : 1.0
('it', "'s") : 0.5
("'s", 'not') : 1.0
('not', 'going') : 1.0
('going', 'to') : 1.0
('to', 'last') : 0.3333333333333333
('last', '.') : 1.0


### 5. Next Word Prediction

In [11]:
def next_word(prev_word, top_n=3):
  psb= [b for b in b_count.keys() if b[0] == prev_word]
  sorted_bs = sorted(psb, key=lambda b: b_probs[b], reverse=True)
  return [b[1] for b in sorted_bs[:top_n]]
prev_word = input("Enter the previous word: ").strip()
pdt_word = next_word(prev_word)
print("Next word prediction for '{}': {}".format(prev_word, pdt_word))


Enter the previous word: or
Next word prediction for 'or': ['success', 'whatever']
