In [None]:
!pip install nltk



In [None]:
# Import required libraries
import nltk  # Natural Language Toolkit for language processing
import sys

# Define a context-free grammar for a simple language
grammar_string = """
  Sentence -> Phrase
  Phrase   -> Term Phrase_prime
  Phrase_prime -> 'po' Term Phrase_prime |
  Term     -> Action
  Action   -> Greeting | Farewell | Request
  Request  -> 'me' 'want' Noun
  Greeting -> 'bello'
  Farewell -> 'Papaya'
  Noun     -> 'banana' | 'bapple'
"""

# Convert the grammar string into an NLTK Context-Free Grammar object
grammar = nltk.CFG.fromstring(grammar_string)
print("Grammar loaded")
print(grammar)  # Display the full grammar rules
print(f"Start symbol: {grammar.start()}")  # Display the starting symbol of the grammar

# Test sentences to evaluate against our grammar
original_sentences = [
    # These sentences should be parsable according to our grammar
    "Bello",                           # Simple greeting
    "Papaya",                         # Simple farewell
    "Me want banana",                  # Simple request
    "Me want bapple",                  # Alternative request
    "Bello po Me want banana",         # Greeting followed by request
    "Me want banana po Papaya",       # Request followed by farewell
    "Bello po Me want banana po Papaya", # Greeting, request, and farewell

    # These sentences should NOT be parsable according to our grammar
    "banana want Me",                  # Incorrect word order
    "po Bello",                        # 'po' cannot start a phrase
    "Me want",                         # Incomplete request (missing noun)
    "Bello po Papaya po",             # Trailing 'po' is invalid
    "banana",                          # Single noun is not a valid sentence
    ""                                 # Empty string is invalid
]

# If grammar was successfully created, proceed with parsing
if grammar:
    # Create a recursive descent parser for our grammar
    # trace=0 means no debug output during parsing
    parser = nltk.RecursiveDescentParser(grammar, trace=0)
    print(f"Using parser: {type(parser).__name__}")

    if parser:
        print("Parsing")
        # Iterate through each test sentence
        for i, sentence in enumerate(original_sentences):
            print(f"\n{i+1}. Sentence: '{sentence}'")

            # Tokenize the sentence by splitting on whitespace and converting to lowercase
            tokens = sentence.lower().split()
            print(f"   Tokens: {tokens}")

            try:
                # Attempt to parse the tokenized sentence using our grammar
                parse_trees = list(parser.parse(tokens))

                # Check if any valid parse trees were found
                if parse_trees:
                    print("Result: Successfully parsed!")
                    print("Parse Tree(s):")
                    # Display the parse tree with Unicode formatting
                    parse_trees[0].pretty_print(unicodelines=True, nodedist=4)
                else:
                    print("Failed to parse (No valid structure found according to the grammar).")

            except ValueError as e:
                # Handle parsing errors
                print(f"No pusiste bien los tokens por milesima vez")



Grammar loaded
Grammar with 13 productions (start state = Sentence)
    Sentence -> Phrase
    Phrase -> Term Phrase_prime
    Phrase_prime -> 'po' Term Phrase_prime
    Phrase_prime -> 
    Term -> Action
    Action -> Greeting
    Action -> Farewell
    Action -> Request
    Request -> 'me' 'want' Noun
    Greeting -> 'bello'
    Farewell -> 'poopaye'
    Noun -> 'banana'
    Noun -> 'bapple'
Start symbol: Sentence
Using parser: RecursiveDescentParser
Parsing

1. Sentence: 'Bello'
   Tokens: ['bello']
Result: Successfully parsed!
Parse Tree(s):
            Sentence                
               │                        
             Phrase                 
   ┌───────────┴─────────────┐          
  Term                       │      
   │                         │          
 Action                      │      
   │                         │          
Greeting                Phrase_prime
   │                         │          
 bello                      ...     


2. Sentence: 'Poop