In [None]:
from collections import defaultdict
import re

def train_bigram_model(text):
    # Tokenize the text
    words = re.findall(r'\b\w+\b', text.lower())

    # Create bigrams
    bigrams = [(words[i], words[i+1]) for i in range(len(words)-1)]

    # Count bigram frequencies
    bigram_counts = defaultdict(int)
    for bigram in bigrams:
        bigram_counts[bigram] += 1

    # Count word frequencies
    word_counts = defaultdict(int)
    for word in words:
        word_counts[word] += 1

    # Calculate bigram probabilities
    bigram_probs = {}
    for bigram, count in bigram_counts.items():
        word1, word2 = bigram
        bigram_probs[bigram] = count / word_counts[word1]

    return bigram_probs

# Example usage
text = "The cat sat on the mat. The dog chased the cat."
bigram_model = train_bigram_model(text)

# Print bigram probabilities
for bigram, prob in bigram_model.items():
    print(f"{bigram}: {prob}")

('the', 'cat'): 0.5
('cat', 'sat'): 0.5
('sat', 'on'): 1.0
('on', 'the'): 1.0
('the', 'mat'): 0.25
('mat', 'the'): 1.0
('the', 'dog'): 0.25
('dog', 'chased'): 1.0
('chased', 'the'): 1.0
