In [1]:
from nltk import PCFG, ViterbiParser, InsideChartParser
import numpy as np
import pandas as pd

In [3]:
grammar = PCFG.fromstring("""
S -> NP VP [1.0]
VP -> V NP [0.7] | VP PP [0.3]
NP -> Det N [0.5] | NP PP [0.2] | 'John' [0.3]
PP -> P NP [1.0]
V -> 'saw' [0.2] | 'ate' [0.8]
Det -> 'the' [0.6] | 'a' [0.4]
N -> 'man' [0.5] | 'telescope' [0.5]
P -> 'with' [0.4] | 'in' [0.6]
""")
parser = ViterbiParser(grammar)

sentence = "John saw a man with a telescope"
tokens = sentence.split()

In [5]:
for tree in parser.parse(tokens):
    print(tree)

(S
  (NP John)
  (VP
    (VP (V saw) (NP (Det a) (N man)))
    (PP (P with) (NP (Det a) (N telescope))))) (p=5.04e-05)


In [7]:
import nltk
from nltk.grammar import PCFG
from nltk.parse.viterbi import ViterbiParser
from nltk.tree import Tree

grammar = PCFG.fromstring("""
S -> NP VP [1.0]
PP -> P NP [1.0]
VP -> V NP [0.7] | VP PP [0.3]
P -> 'with' [1.0]
V -> 'saw' [1.0]
NP -> NP PP [0.4] | 'astronomers' [0.1] | 'ears' [0.18] | 'saw' [0.04] | 'stars' [0.28]
""")
parser = ViterbiParser(grammar)
sentence = "astronomers saw stars with ears"
tokens = sentence.split()
try:
    for tree in parser.parse(tokens):
        tree.pretty_print()
except:
    print("No Parse tree found")

                  S                    
      ____________|____                 
     |                 VP              
     |        _________|___             
     |       |             NP          
     |       |     ________|____        
     |       |    |             PP     
     |       |    |         ____|___    
     NP      V    NP       P        NP 
     |       |    |        |        |   
astronomers saw stars     with     ears



In [9]:
pcfg = nltk.PCFG.fromstring("""
S -> NP VP [0.9] | VP [0.1]
VP -> V NP [0.5] | V [0.5]
NP -> Det N [0.3] | N [0.7]
N -> 'cat' [0.2] | 'book' [0.2] | 'bird' [0.2] | 'dog' [0.4]
V -> 'read' [0.1] | 'chased' [0.6] | 'ate' [0.3]
Det -> 'the' [0.5] | 'a' [0.5]
""")

sentences = [
    'the cat chased the dog',
    'the dog chased the cat',
    'the dog chased the bird',
    'book read'
]

def CYK(pcfg, sentence):
    n = len(sentence)
    table = [[[] for _ in range(n)] for _ in range(n)]
    for i in range(n):
        for prod in pcfg.productions(rhs=sentence[i]):
            table[i][i] = [(prod.lhs(), prod.prob())]
    for length in range(2, n+1):
        for i in range(n-length+1):
            j = i+length-1
            for k in range(i,j):
                for prod in pcfg.productions():
                    for left, left_prob in table[i][k]:
                        for right, right_prob in table[k+1][j]:
                            if prod.rhs() == (left,right):
                                prob = left_prob*right_prob*prod.prob()
                                if table[i][j] is None:
                                    table[i][j] = [(prod.lhs(),prob)]
                                else:
                                    table[i][j].append((prod.lhs(),prob))
    if pcfg.start() in [lhs for lhs, prob in table[0][n-1]]:
        for lhs, prob in table[0][n-1]:
            if lhs == pcfg.start():
                return prob
    return 0.0

for sent in sentences:
    prob = CYK(pcfg,sent.lower().split())
    if prob > 0:
        print(f'The sentence "{sent}" is grammatically correct with a probability of: {prob}')
    else:
        print(f'NOT GRAMMATICALLY CORRECT')

The sentence "the cat chased the dog" is grammatically correct with a probability of: 0.00048599999999999994
The sentence "the dog chased the cat" is grammatically correct with a probability of: 0.00048599999999999994
The sentence "the dog chased the bird" is grammatically correct with a probability of: 0.00048599999999999994
NOT GRAMMATICALLY CORRECT
