In [1]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [2]:
from nltk.chunk import RegexpParser
from nltk.tokenize import word_tokenize

In [3]:
sentence = "Vidyavardhini's college of Engineering and Technology VASAI"

### Tokenization

In [4]:
tokens = word_tokenize(sentence)

In [5]:
tokens

['Vidyavardhini',
 "'s",
 'college',
 'of',
 'Engineering',
 'and',
 'Technology',
 'VASAI']

### POS tagging

In [6]:
pos_tags = nltk.pos_tag(tokens)

In [7]:
pos_tags

[('Vidyavardhini', 'NNP'),
 ("'s", 'POS'),
 ('college', 'NN'),
 ('of', 'IN'),
 ('Engineering', 'NNP'),
 ('and', 'CC'),
 ('Technology', 'NNP'),
 ('VASAI', 'NNP')]

### Chunking patterns

In [8]:
chunk_patterns = r"""
    NP: {<DT>?<JJ>*<NN>}  # Chunk noun phrases
    VP: {<VB.*><NP|PP>}  # Chunk verb phrases
"""

In [9]:
chunk_patterns

'\n    NP: {<DT>?<JJ>*<NN>}  # Chunk noun phrases\n    VP: {<VB.*><NP|PP>}  # Chunk verb phrases\n'

### Create a chunk parser

In [10]:
chunk_parser = RegexpParser(chunk_patterns)

In [11]:
chunk_parser

<chunk.RegexpParser with 2 stages>

### Perform chunking

In [12]:
result = chunk_parser.parse(pos_tags)

In [13]:
print(result)

(S
  Vidyavardhini/NNP
  's/POS
  (NP college/NN)
  of/IN
  Engineering/NNP
  and/CC
  Technology/NNP
  VASAI/NNP)
