**Name:** Chaitanya Bhude

**Reg No:** 21BAI1445

**Topic:** Parsing and Chunking

In [None]:
!pip install nltk
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


True

In [None]:
import pandas as pd
import nltk
from nltk import word_tokenize, pos_tag
from nltk.chunk import RegexpParser

def process_csv(csv_file_path):
    df = pd.read_csv(csv_file_path)
    corpus = df['Sentences'].tolist()
    return corpus

def parse_and_chunk(sentences):
    grammar = """
    NP: {<DT>?<JJ>*<NN.*>}   # Noun Phrase
    PP: {<IN><NP>}           # Prepositional Phrase
    VP: {<VB.*><NP|PP|CLAUSE>+$}  # Verb Phrase
    CLAUSE: {<NP><VP>}      # Clause
    """

    # Create a chunk parser
    cp = RegexpParser(grammar)

    results = []
    for sentence in sentences:
        # Tokenize and tag parts of speech
        tokens = word_tokenize(sentence)
        tagged = pos_tag(tokens)

        # Parse and chunk
        parsed = cp.parse(tagged)

        # Convert parsed result to a string for display
        chunked_str = "\n".join([str(subtree) for subtree in parsed])

        # Append the result
        results.append({
            'sentence': sentence,
            'parsed': parsed,
            'chunked': chunked_str
        })

    return results

csv_file_path = '/content/hai - Sheet1.csv'
corpus = process_csv(csv_file_path)
#print(corpus)

results = parse_and_chunk(corpus)

for result in results:
    print(f"Sentence: {result['sentence']}")
    print("Parsed:")
    print(result['parsed'])
    print("\nChunked:")
    print(result['chunked'])
    print("-" * 50)


Sentence: The Sun is the largest celestial body in the solar system
Parsed:
(S
  (NP The/DT Sun/NNP)
  is/VBZ
  the/DT
  largest/JJS
  (NP celestial/JJ body/NN)
  (PP in/IN (NP the/DT solar/JJ system/NN)))

Chunked:
(NP The/DT Sun/NNP)
('is', 'VBZ')
('the', 'DT')
('largest', 'JJS')
(NP celestial/JJ body/NN)
(PP in/IN (NP the/DT solar/JJ system/NN))
--------------------------------------------------
Sentence: The solar system consists of the Sun and eight revolving planets
Parsed:
(S
  (NP The/DT solar/JJ system/NN)
  consists/VBZ
  (PP of/IN (NP the/DT Sun/NNP))
  and/CC
  eight/CD
  (VP revolving/VBG (NP planets/NNS)))

Chunked:
(NP The/DT solar/JJ system/NN)
('consists', 'VBZ')
(PP of/IN (NP the/DT Sun/NNP))
('and', 'CC')
('eight', 'CD')
(VP revolving/VBG (NP planets/NNS))
--------------------------------------------------
Sentence: Ra was the Egyptian Sun God
Parsed:
(S
  (CLAUSE
    (NP Ra/NNP)
    (VP was/VBD (NP the/DT Egyptian/JJ Sun/NNP) (NP God/NNP))))

Chunked:
(CLAUSE
  (NP 