In [11]:
import math
import nltk
from tabulate import tabulate

class Dictlist(dict):
    def __setitem__(self, key, value):
        try:
            self[key]
        except KeyError:
            super(Dictlist, self).__setitem__(key, [])
        self[key].append(value)


class ProductionRule(object):
    def __init__(self, result, p1, p2, probability):
        self.result = result
        self.p1 = p1
        self.p2 = p2
        self.probability = probability

    @property
    def get_type(self):
        return self.result

    @property
    def get_left(self):
        return self.p1

    @property
    def get_right(self):
        return self.p2

    @property
    def get_probability(self):
        return self.probability


class Cell(object):
    productions = []

    def __init__(self, productions=None):
        if productions is None:
            self.productions = []
        else:
            self.productions = productions

    def add_production(self, result, p1, p2, probability):
        self.productions.append(ProductionRule(result, p1, p2, probability))

    def set_productions(self, p):
        self.productions = p

    @property
    def get_types(self):
        types = []
        for p in self.productions:
            types.append(p.result)
        return types

    @property
    def get_rules(self):
        return self.productions


class Grammar(object):
    grammar_rules = Dictlist()
    parse_table = None
    length = 0
    tokens = []
    number_of_trees = 0

    def __init__(self, filename):
        self.grammar_rules = Dictlist()
        self.parse_table = None
        self.length = 0
        for line in open(filename):
            production, prob = line.split("[")
            prob = float(prob.rstrip().strip("]"))
            a, b = production.split("->")
            self.grammar_rules[b.rstrip().strip()] = (a.rstrip().strip(), prob)

        if len(self.grammar_rules) == 0:
            raise ValueError("No rules found in the grammar file")


    def apply_rules(self, t):
        try:
            return self.grammar_rules[t]
        except KeyError as r:
            return None

    def parse(self, sentence):
        self.number_of_trees = 0
        self.tokens = sentence.split()
        self.length = len(self.tokens)
        if self.length < 1:
            raise ValueError("The sentence could not be read")
        self.parse_table = [[Cell() for x in range(self.length - y)] for y in range(self.length)]

        for x, t in enumerate(self.tokens):
            r = self.apply_rules(t)
            if r is None:
                raise ValueError(f"The word {str(t)} is not in the grammar")
            else:
                for w, prob_w in r:
                    self.parse_table[0][x].add_production(w, ProductionRule(t, None, None, prob_w), None, prob_w)

        for l in range(2, self.length + 1):
            for s in range(1, self.length - l + 2):
                for p in range(1, l - 1 + 1):
                    t1 = self.parse_table[p - 1][s - 1].get_rules
                    t2 = self.parse_table[l - p - 1][s + p - 1].get_rules

                    for a in t1:
                        for b in t2:
                            r = self.apply_rules(f"{a.get_type} {b.get_type}")
                            if r is not None:
                                for w, prob_w in r:
                                    prob_a = a.get_probability
                                    prob_b = b.get_probability
                                    prob_combined = prob_a * prob_b * prob_w  # Update the probability calculation
                                    self.parse_table[l - 1][s - 1].add_production(w, a, b, prob_combined)

        self.number_of_trees = len(self.parse_table[self.length - 1][0].get_types)
        if self.number_of_trees > 0:
            print('The sentence IS accepted in the language')
            print('Number of possible trees: ' + str(self.number_of_trees))

            # Print the most probable parse tree
            most_probable_tree = max(self.parse_table[self.length - 1][0].productions, key=lambda x: x.get_probability)

        else:
            print('The sentence IS NOT accepted in the language')

        self.print_probabilities()  # Print probabilities at the end

    def print_probabilities(self):
        print("\nProbabilities:")
        for production in self.parse_table[-1][0].productions:
            print(f"{production.get_type} Probability: {production.get_probability}")

    def print_parse_table(self):

        lines = []

        for i, row in enumerate(reversed(self.parse_table)):
            l = [''] * (len(self.parse_table) - i - 1)  # Add empty cells for upper triangular format
            for cell in row:
                l.append(cell.get_types)
            lines.append(l)

        lines.append(self.tokens)
        print('')
        print(tabulate(lines, showindex=False, tablefmt='plain'))
        print('')


g = Grammar('pcfg_grammar.txt')
g.parse('astronomers saw stars with telescope')
g.print_parse_table()

The sentence IS accepted in the language
Number of possible trees: 2

Probabilities:
S Probability: 0.000504
S Probability: 0.000378

                                         ['S', 'S']
                                  []     ['VP', 'VP']
                          ['S']   []     ['NP']
             []           ['VP']  []     ['PP']
['NP']       ['NP', 'V']  ['NP']  ['P']  ['NP']
astronomers  saw          stars   with   telescope

