In [8]:
#SarinaKasaiyan
from collections import defaultdict, Counter

class StupidBackoffLanguageModel:
    def __init__(self, n):
        self.n = n  # Maximum n-gram order (e.g., 1 for unigram, 2 for bigram, etc.)
        self.ngrams = defaultdict(Counter)  # Dictionary to hold counts of n-grams

    def train(self, sentences):
        """Train the model using the provided sentences."""
        for sentence in sentences:
            tokens = sentence.split()  # Split the sentence into words (tokens)
            for i in range(len(tokens)):
                # Generate n-grams of order from 1 to n
                for j in range(1, self.n + 1):
                    if i + j <= len(tokens):  # Ensure we don't go out of bounds
                        ngram = tuple(tokens[i:i + j])  # Create an n-gram as a tuple
                        self.ngrams[j][ngram] += 1  # Increment the count of this n-gram

    def stupid_backoff_probability(self, ngram):
        """Calculate the probability of an n-gram using Stupid Backoff."""
        order = len(ngram)  # Get the order of the n-gram (number of words)

        # Check the count of the current n-gram
        count = self.ngrams[order][ngram]

        if count > 0:
            # If the n-gram exists, return its probability
            return count / sum(self.ngrams[order].values())

        # Back off to lower order n-grams with a fixed scaling factor
        scaling_factor = 0.4

        for backoff_order in range(order - 1, 0, -1):  # Loop through lower orders
            backoff_ngram = ngram[1:]  # Remove the first word to create a backoff n-gram
            count = self.ngrams[backoff_order][backoff_ngram]  # Get count for backoff n-gram

            if count > 0:
                # If the backoff n-gram exists, calculate its probability and apply scaling
                return scaling_factor * (count / sum(self.ngrams[backoff_order].values()))

        # Finally, fall back to unigram probability if all else fails
        return scaling_factor * (self.ngrams[1][ngram[-1]] / sum(self.ngrams[1].values()))

# Example usage
sentences = [
    "I want to go",
    "I want to eat",
    "I want to sleep"
]

# Create an instance of Stupid Backoff model with maximum n=3 (trigrams)
stupid_model = StupidBackoffLanguageModel(n=3)
stupid_model.train(sentences)  # Train the model with example sentences

# Print probabilities for various n-grams
print("Stupid Backoff Probabilities:")
print(stupid_model.stupid_backoff_probability(("want", "to", "go")))  # Trigram probability
print(stupid_model.stupid_backoff_probability(("to", "go")))           # Bigram probability
print(stupid_model.stupid_backoff_probability(("go",)))                 # Unigram probability

Stupid Backoff Probabilities:
0.16666666666666666
0.1111111111111111
0.08333333333333333
