<a href="https://colab.research.google.com/github/M4urici02002/TC2037-Evidence2/blob/main/Evidence2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Import necessary modules from nltk for natural language processing
import nltk
from nltk import CFG  # Context-Free Grammar module for defining grammars
from nltk.tokenize import word_tokenize  # Tokenization function to split text into tokens
from nltk.tree import Tree  # Import Tree for enhanced visual representation of parse trees
nltk.download('punkt')  # Ensure the 'punkt' tokenizer models are downloaded for tokenization

# Define the grammar using a multi-line string
# This grammar defines how sentences in Spanish can be structured syntactically
grammar = CFG.fromstring("""
  S -> NP VP SP
  SP -> Conj S |
  NP -> Det N | Det AP N | Pron | NP PP | NP Conj NP
  VP -> V NP | V PP | VP Conj VP | V Adv | V S | V NP PP
  PP -> P NP
  AP -> Adj AP | Adj
  Det -> 'el' | 'la' | 'los' | 'las' | 'un' | 'una'
  N -> 'niño' | 'niña' | 'libro' | 'ciudad' | 'parque' | 'mesa' | 'cine'
  P -> 'en' | 'sobre' | 'bajo' | 'contra' | 'al'
  Pron -> 'él' | 'ella' | 'ellos' | 'ellas' | 'lo' | 'la' | 'le'
  V -> 'corre' | 'salta' | 'piensa' | 'cree' | 'ha' | 'están' | 'van' | 'es' | 'lee'
  Adj -> 'alegre' | 'triste' | 'grande' | 'pequeño' | 'interesante'
  Adv -> 'rápidamente' | 'lentamente' | 'ayer' | 'hoy'
  Conj -> 'y' | 'o' | 'pero' | 'porque' | 'que'
""")

# Create a parser instance from the defined grammar using NLTK's ChartParser
parser = nltk.ChartParser(grammar)

# Function to parse and validate a sentence according to the defined grammar
def parse_and_validate(sentence):
    # Tokenize the input sentence using NLTK's word_tokenize, set to Spanish for correct tokenization of Spanish text
    tokens = word_tokenize(sentence, language='spanish')
    parsed = False  # Initialize parsed status as False
    print("\nSpanish sentence that is parsing:", sentence)
    try:
        # Attempt to parse the tokenized sentence, generating parse trees
        for tree in parser.parse(tokens):
            tree.pretty_print()  # Visually display the parse tree using pretty_print
            parsed = True  # Set parsed status to True if at least one parse tree is generated
    except ValueError as error:
        print(error)

    if parsed:
        print("The sentence is valid.")  # Sentence conforms to the grammar
    else:
        print("Sentence is invalid.")  # No valid parse tree could be generated

while True:
    user_input = input("Please enter a sentence 'exit' to quit: ")
    if user_input.lower() == 'exit':
        break
    parse_and_validate(user_input)  # Process the user input through the parse and validate function

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Please enter a sentence 'exit' to quit: él y ella están en la ciudad

Spanish sentence that is parsing: él y ella están en la ciudad
                 S                             
       __________|___________________________   
      |                   VP                 | 
      |           ________|___               |  
      NP         |            PP             | 
  ____|____      |     _______|___           |  
 NP   |    NP    |    |           NP         | 
 |    |    |     |    |        ___|____      |  
Pron Conj Pron   V    P      Det       N     SP
 |    |    |     |    |       |        |     |  
 él   y   ella están  en      la     ciudad ...

The sentence is valid.
Please enter a sentence 'exit' to quit: la niña lee el libro

Spanish sentence that is parsing: la niña lee el libro
              S                   
      ________|_________________   
     |            VP            | 
     |         ___|___          |  
     NP       |       NP        | 
  ___|___     | 

# Sección nueva