In [2]:
import nltk
from nltk.util import ngrams
from collections import Counter
import re

nltk.download('punkt')

text = """
Natural language processing (NLP) is a machine learning technology that gives computers the ability to interpret, manipulate, 
and comprehend human language.
"""

def tokenize(text):
    text = text.lower()
    words = nltk.word_tokenize(text)
    return words

def compute_bigram_probabilities(tokens):
    bigrams = ngrams(tokens, 2)
    bigram_counts = Counter(bigrams)
    unigram_counts = Counter(tokens)
    
    bigram_probabilities = {}
    
    for (word1, word2), count in bigram_counts.items():
        unigram_count = unigram_counts[word1]
        if unigram_count > 0:
            bigram_probabilities[(word1, word2)] = count / unigram_count
            
    return bigram_probabilities

tokens = tokenize(text)

bigram_probabilities = compute_bigram_probabilities(tokens)

print("Bigram Probabilities:")
for (word1, word2), prob in bigram_probabilities.items():
    print(f"P({word2}|{word1}) = {prob:.4f}")


Bigram Probabilities:
P(language|natural) = 1.0000
P(processing|language) = 0.5000
P((|processing) = 1.0000
P(nlp|() = 1.0000
P()|nlp) = 1.0000
P(is|)) = 1.0000
P(a|is) = 1.0000
P(machine|a) = 1.0000
P(learning|machine) = 1.0000
P(technology|learning) = 1.0000
P(that|technology) = 1.0000
P(gives|that) = 1.0000
P(computers|gives) = 1.0000
P(the|computers) = 1.0000
P(ability|the) = 1.0000
P(to|ability) = 1.0000
P(interpret|to) = 1.0000
P(,|interpret) = 1.0000
P(manipulate|,) = 0.5000
P(,|manipulate) = 1.0000
P(and|,) = 0.5000
P(comprehend|and) = 1.0000
P(human|comprehend) = 1.0000
P(language|human) = 1.0000
P(.|language) = 0.5000


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Welcome\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
