In [None]:
import random

def generate_sentence(grammar, start_symbol):
    if start_symbol not in grammar:
        return [start_symbol]

    expansions = random.choice(grammar[start_symbol])
    sentence = []
    for symbol in expansions:
        sentence.extend(generate_sentence(grammar, symbol))
    return sentence

grammar = {
    "NP": [["Det", "Nom"]],
    "Nom": [["AP", "Nom"], ["book"],
            ["orange"], ["man"]],
    "AP": [["Adv", "A"], ["heavy"],
            ["orange"], ["tall"]],
    "Det": [["a"]],
    "Adv": [["very"], ["extremely"]],
    "A": [["heavy"], ["orange"], ["tall"],
        ["muscular"]]
    }

sentence = generate_sentence(grammar, 'NP')
print('\t'.join(sentence))


a	tall	man


In [None]:
non_terminals = ["NP", "Nom", "Det", "AP",
                "Adv", "A"]
terminals = ["book", "orange", "man",
             "tall", "heavy",
             "very", "muscular"]

def cykParse(w):
    n = len(w)
    T = [[set([]) for j in range(n)] for i in range(n)]
    for j in range(0, n):

        for lhs, rule in R.items():
            for rhs in rule:

                if len(rhs) == 1 and \
                rhs[0] == w[j]:
                    T[j][j].add(lhs)

        for i in range(j, -1, -1):
            for k in range(i, j + 1):
                for lhs, rule in R.items():
                    for rhs in rule:

                        if len(rhs) == 2 and \
                        rhs[0] in T[i][k] and \
                        rhs[1] in T[k + 1][j]:
                            T[i][j].add(lhs)

    for row in T:
        print(row)

    if len(T[0][n-1]) != 0:
        print("True")
    else:
        print("False")

w = "a very heavy orange book".split()
cykParse(w)

[{'Det'}, set(), set(), {'NP'}, {'NP'}]
[set(), {'Adv'}, {'AP'}, {'Nom'}, {'Nom'}]
[set(), set(), {'AP', 'A'}, {'Nom'}, {'Nom'}]
[set(), set(), set(), {'Nom', 'AP', 'A'}, {'Nom'}]
[set(), set(), set(), set(), {'Nom'}]
True


In [2]:
from nltk import Nonterminal

# CKY Algorithm
def cky_parse(sentence, grammar):
    words = sentence.split()
    n = len(words)
    table = [[set() for _ in range(n+1)] for _ in range(n+1)]

    # Initialize table with lexical rules
    for i, word in enumerate(words):
        productions = grammar.productions(rhs=word)
        for prod in productions:
            table[i][i+1].add(prod.lhs())

    # CYK Algorithm
    for length in range(2, n+1):
        for start in range(n-length+1):
            end = start + length
            for mid in range(start+1, end):
                for prod in grammar.productions():
                    if len(prod.rhs()) == 2:
                        B, C = prod.rhs()
                        if B in table[start][mid] and C in table[mid][end]:
                            table[start][end].add(prod.lhs())

    # Check if the start symbol is in the final cell
    start_symbol = grammar.start()
    if start_symbol in table[0][n]:
        return True
    else:
        return False

# Example usage
sentence_to_parse = "the man runs"
if cky_parse(sentence_to_parse, grammar):
    print(f"The sentence '{sentence_to_parse}' is grammatically correct.")
else:
    print(f"The sentence '{sentence_to_parse}' is not grammatically correct.")


The sentence 'the man runs' is not grammatically correct.


In [3]:
from nltk import Nonterminal
from nltk import CFG

def cky_parse(sentence, grammar):
    words = sentence.split()
    n = len(words)
    table = [[set() for _ in range(n+1)] for _ in range(n+1)]

    # Fill table with lexical rules
    for i, word in enumerate(words):
        productions = grammar.productions(rhs=word)
        for prod in productions:
            table[i][i+1].add(prod.lhs())

    # CKY Algorithm
    for length in range(2, n+1):
        for start in range(n-length+1):
            end = start + length
            for mid in range(start+1, end):
                for prod in grammar.productions():
                    if len(prod.rhs()) == 2:
                        B, C = prod.rhs()
                        if B in table[start][mid] and C in table[mid][end]:
                            table[start][end].add(prod.lhs())

    # Print parsing steps
    for length in range(1, n+1):
        for start in range(n-length+1):
            end = start + length
            print(f"T [{start+1}, {end}] =", table[start][end])

    # Check if the start symbol is in the final cell
    start_symbol = grammar.start()
    if start_symbol in table[0][n]:
        return True
    else:
        return False

# Define a Context-Free Grammar (CFG)
grammar = CFG.fromstring("""
    S -> NP VP
    NP -> Det Nom | Det Adj Nom
    VP -> V
    Nom -> Adj Nom | N
    Det -> 'the' | 'a'
    Adj -> 'orange' | 'very'
    N -> 'book'
    V -> 'is'
""")

# Example usage
sentence_to_parse = "the very orange book is"
if cky_parse(sentence_to_parse, grammar):
    print(f"The sentence '{sentence_to_parse}' is grammatically correct.")
else:
    print(f"The sentence '{sentence_to_parse}' is not grammatically correct.")


T [1, 1] = {Det}
T [2, 2] = {Adj}
T [3, 3] = {Adj}
T [4, 4] = {N}
T [5, 5] = {V}
T [1, 2] = set()
T [2, 3] = set()
T [3, 4] = set()
T [4, 5] = set()
T [1, 3] = set()
T [2, 4] = set()
T [3, 5] = set()
T [1, 4] = set()
T [2, 5] = set()
T [1, 5] = set()
The sentence 'the very orange book is' is not grammatically correct.
