In [1]:
# CYK Used defined
import numpy as pd
import nltk
# Define a Probabilistic Context-Free Grammar (PCFG)
pcfg = nltk.PCFG.fromstring("""
 S -> NP VP [0.9]
 S -> VP [0.1]
 VP -> V NP [0.5]
 VP -> V [0.5]
 NP -> Det N [0.3]
 NP -> N [0.7]
 N -> 'cat' [0.2]
 N -> 'book' [0.2]
 N -> 'bird' [0.2]
 N -> 'dog' [0.4]
 V -> 'read' [0.1]
 V -> 'chased' [0.6]
 V -> 'ate' [0.3]
 Det -> 'the' [0.5]
 Det -> 'a' [0.5]
""")
sentences=[
"the cat chased the dog",
"the dog chased the cat",
"the dog chased the bird",
"Book read"
]
# Input sentence to parse
#sentence = "the cat chased the dog".split()
def cyk_parse_with_probability(pcfg, sentence):
    n = len(sentence)
    table = [[[] for _ in range(n)] for _ in range(n)]
 # Initialize the table with production probabilities
    for i in range(n):
        for prod in pcfg.productions(rhs=sentence[i]):
            table[i][i] = [(prod.lhs(), prod.prob())]
 # Fill the table for longer substrings
    for length in range(2, n + 1):
        for i in range(n - length + 1):
            j = i + length - 1
            for k in range(i, j):
                for prod in pcfg.productions():
                    for left, left_prob in table[i][k]:
                        for right, right_prob in table[k + 1][j]:
                            if prod.rhs() == (left, right):
                                prob = left_prob * right_prob *prod.prob()
                                if table[i][j] is None:
                                    table[i][j] = [(prod.lhs(), prob)]
                                else:
                                    table[i][j].append((prod.lhs(),prob))
 # Check if the start symbol is in the top-right cell
    if pcfg.start() in [lhs for lhs, prob in table[0][n - 1]]:
 # Find the probability for the full sentence
        for lhs, prob in table[0][n - 1]:
            if lhs == pcfg.start():
                return prob
    return 0.0
# Parse the sentence and calculate the probability
for sent in sentences:
    probability = cyk_parse_with_probability(pcfg,sent.lower().split())
    if probability > 0:
        print(f'The sentence "{sent}" is grammatically correct with a probability of:{probability}')
    else:
        print(f"The sentence \"{sent}\" is not grammatically correct.")

The sentence "the cat chased the dog" is grammatically correct with a probability of:0.00048599999999999994
The sentence "the dog chased the cat" is grammatically correct with a probability of:0.00048599999999999994
The sentence "the dog chased the bird" is grammatically correct with a probability of:0.00048599999999999994
The sentence "Book read" is not grammatically correct.
