In [20]:
# CYK Used defined
import nltk
from nltk import PCFG,InsideChartParser
import numpy as np
import pandas as pd
# Define a Probabilistic Context-Free Grammar (PCFG)
with open("pcfg_input.txt") as f:
    r=f.read()
pcfg = nltk.PCFG.fromstring(r)

sent="astronomers saw stars with ears"


# Input sentence to parse

#sentence = "the cat chased the dog".split()

def cyk_parse_with_probability(pcfg, sentence):
  n = len(sentence)
  #3d matrix
  table = []
  for _ in range(n):
    row = []
    for _ in range(n):
        row.append([])
    table.append(row)


  # Initialize the table with production probabilities
  for i in range(n):
    for prod in pcfg.productions(rhs=sentence[i]):#astronomers
      table[i][i].append((prod.lhs(), prod.prob()))#lhs and prob

  # Fill the table for longer substrings
  for length in range(2, n + 1):
    for i in range(n - length + 1):#n-no.of words in sent
      j = i + length - 1#for setting next diagonal
      for k in range(i, j):#for different combinations in diagonal i,j (0,3)-0,1 1,3 or 0,2 2,3
        for prod in pcfg.productions():
          for left, left_prob in table[i][k]:
            for right, right_prob in table[k + 1][j]:
              if prod.rhs() == (left, right):
                prob = left_prob * right_prob *prod.prob()
                table[i][j].append((prod.lhs(),prob))
  print(table)
  P=0.0
  for lhs, prob in table[0][n - 1]:
      if lhs == pcfg.start():
        P+=prob
  return P

probability = cyk_parse_with_probability(pcfg,sent.lower().split())
if probability > 0:
  print(f'The sentence "{sent}" is grammatically correct with a probability of:', probability)
else:
  print(f"The sentence \"{sent}\" is not grammatically correct.")

parser = InsideChartParser(pcfg)
tokens = sent.split()
for tree in parser.parse(tokens):
    print(tree)

[[[(NP, 0.1)], [], [(S, 0.0126)], [], [(S, 0.0009071999999999999), (S, 0.0006804)]], [[], [(V, 1.0), (NP, 0.04)], [(VP, 0.126)], [], [(VP, 0.009071999999999998), (VP, 0.006803999999999999)]], [[], [], [(NP, 0.18)], [], [(NP, 0.01296)]], [[], [], [], [(P, 1.0)], [(PP, 0.18)]], [[], [], [], [], [(NP, 0.18)]]]
The sentence "astronomers saw stars with ears" is grammatically correct with a probability of: 0.0015875999999999998
(S
  (NP astronomers)
  (VP (V saw) (NP (NP stars) (PP (P with) (NP ears))))) (p=0.0009072)
(S
  (NP astronomers)
  (VP (VP (V saw) (NP stars)) (PP (P with) (NP ears)))) (p=0.0006804)


In [11]:
# CYK Used defined
import nltk
from nltk import PCFG,ViterbiParser
import numpy as np
import pandas as pd
# Define a Probabilistic Context-Free Grammar (PCFG)
with open("pcfg_input.txt") as f:
    r=f.read()
pcfg = nltk.PCFG.fromstring(r)

sent="astronomers saw stars with ears"


# Input sentence to parse

#sentence = "the cat chased the dog".split()

def cyk_parse_with_probability(pcfg, sentence):
  n = len(sentence)
  table = [[[] for _ in range(n)] for _ in range(n)]
  # Initialize the table with production probabilities
  for i in range(n):
    for prod in pcfg.productions(rhs=sentence[i]):
      table[i][i].append((prod.lhs(), prod.prob()))

  # Fill the table for longer substrings
  for length in range(2, n + 1):
    for i in range(n - length + 1):
      j = i + length - 1
      for k in range(i, j):
        for prod in pcfg.productions():
          for left, left_prob in table[i][k]:
            for right, right_prob in table[k + 1][j]:
              if prod.rhs() == (left, right):
                prob = left_prob * right_prob *prod.prob()
                table[i][j].append((prod.lhs(),prob))

  P=0.0
  for lhs, prob in table[0][n - 1]:
    P=max(prob,P)
  return P

probability = cyk_parse_with_probability(pcfg,sent.lower().split())
if probability > 0:
  print(f'The sentence "{sent}" is grammatically correct with a probability of:', probability)
else:
  print(f"The sentence \"{sent}\" is not grammatically correct.")

parser = ViterbiParser(pcfg)
tokens = sent.split()
for tree in parser.parse(tokens):
    print(tree)

The sentence "astronomers saw stars with ears" is grammatically correct with a probability of: 0.0009071999999999999
(S
  (NP astronomers)
  (VP (V saw) (NP (NP stars) (PP (P with) (NP ears))))) (p=0.0009072)
