## Chunking
Grouping words together that show descriptive group of words. This will be done using POS tagging and Regular expressions.

In [4]:
import nltk
from nltk.corpus import state_union
from nltk.tokenize import PunktSentenceTokenizer

In [2]:
sample_text = state_union.raw('2006-GWBush.txt')
sample_text

'PRESIDENT GEORGE W. BUSH\'S ADDRESS BEFORE A JOINT SESSION OF THE CONGRESS ON THE STATE OF THE UNION\n \nJanuary 31, 2006\n\nTHE PRESIDENT: Thank you all. Mr. Speaker, Vice President Cheney, members of Congress, members of the Supreme Court and diplomatic corps, distinguished guests, and fellow citizens: Today our nation lost a beloved, graceful, courageous woman who called America to its founding ideals and carried on a noble dream. Tonight we are comforted by the hope of a glad reunion with the husband who was taken so long ago, and we are grateful for the good life of Coretta Scott King. (Applause.)\n\nPresident George W. Bush reacts to applause during his State of the Union Address at the Capitol, Tuesday, Jan. 31, 2006. White House photo by Eric DraperEvery time I\'m invited to this rostrum, I\'m humbled by the privilege, and mindful of the history we\'ve seen together. We have gathered under this Capitol dome in moments of national mourning and national achievement. We have serv

In [3]:
custom_sent_tokenizer = PunktSentenceTokenizer(sample_text)
tokenized = custom_sent_tokenizer.tokenize(sample_text)
tokenized

["PRESIDENT GEORGE W. BUSH'S ADDRESS BEFORE A JOINT SESSION OF THE CONGRESS ON THE STATE OF THE UNION\n \nJanuary 31, 2006\n\nTHE PRESIDENT: Thank you all.",
 'Mr. Speaker, Vice President Cheney, members of Congress, members of the Supreme Court and diplomatic corps, distinguished guests, and fellow citizens: Today our nation lost a beloved, graceful, courageous woman who called America to its founding ideals and carried on a noble dream.',
 'Tonight we are comforted by the hope of a glad reunion with the husband who was taken so long ago, and we are grateful for the good life of Coretta Scott King.',
 '(Applause.)',
 "President George W. Bush reacts to applause during his State of the Union Address at the Capitol, Tuesday, Jan. 31, 2006. White House photo by Eric DraperEvery time I'm invited to this rostrum, I'm humbled by the privilege, and mindful of the history we've seen together.",
 'We have gathered under this Capitol dome in moments of national mourning and national achievement

In [12]:
# This will create a tuple part word and part of speech tag and chunks
for i in tokenized:
    words = nltk.word_tokenize(i)
    tagged = nltk.pos_tag(words)
    
    # Regex
    chunkGram = r"""Chunk: {<RB.?>*<VB.?>*<NNP>}"""
    
    #Chunking
    chunkParser = nltk.RegexpParser(chunkGram)
    chunked = chunkParser.parse(tagged)
    #chunked.draw()
    print(chunked)

(S
  (Chunk PRESIDENT/NNP)
  (Chunk GEORGE/NNP)
  (Chunk W./NNP)
  (Chunk BUSH/NNP)
  'S/POS
  (Chunk ADDRESS/NNP)
  BEFORE/IN
  (Chunk A/NNP)
  (Chunk JOINT/NNP)
  (Chunk SESSION/NNP)
  OF/IN
  (Chunk THE/NNP)
  (Chunk CONGRESS/NNP)
  (Chunk ON/NNP)
  (Chunk THE/NNP)
  (Chunk STATE/NNP)
  OF/IN
  (Chunk THE/NNP)
  (Chunk UNION/NNP)
  (Chunk January/NNP)
  31/CD
  ,/,
  2006/CD
  (Chunk THE/NNP)
  (Chunk PRESIDENT/NNP)
  :/:
  (Chunk Thank/NNP)
  you/PRP
  all/DT
  ./.)
(S
  (Chunk Mr./NNP)
  (Chunk Speaker/NNP)
  ,/,
  (Chunk Vice/NNP)
  (Chunk President/NNP)
  (Chunk Cheney/NNP)
  ,/,
  members/NNS
  of/IN
  (Chunk Congress/NNP)
  ,/,
  members/NNS
  of/IN
  the/DT
  (Chunk Supreme/NNP)
  (Chunk Court/NNP)
  and/CC
  diplomatic/JJ
  corps/NN
  ,/,
  distinguished/JJ
  guests/NNS
  ,/,
  and/CC
  fellow/JJ
  citizens/NNS
  :/:
  Today/VB
  our/PRP$
  nation/NN
  lost/VBD
  a/DT
  beloved/VBN
  ,/,
  graceful/JJ
  ,/,
  courageous/JJ
  woman/NN
  who/WP
  (Chunk called/VBD America/NNP)

(S
  Do/VBP
  n't/RB
  hesitate/VB
  to/TO
  honor/VB
  and/CC
  support/VB
  those/DT
  of/IN
  us/PRP
  who/WP
  have/VBP
  the/DT
  honor/NN
  of/IN
  protecting/VBG
  that/DT
  which/WDT
  is/VBZ
  worth/JJ
  protecting/VBG
  ./.
  ''/'')
(S
  (Chunk Staff/NNP)
  (Chunk Sergeant/NNP)
  (Chunk Dan/NNP)
  (Chunk Clay/NNP)
  's/POS
  wife/NN
  ,/,
  (Chunk Lisa/NNP)
  ,/,
  and/CC
  his/PRP$
  mom/NN
  and/CC
  dad/NN
  ,/,
  (Chunk Sara/NNP)
  (Chunk Jo/NNP)
  and/CC
  (Chunk Bud/NNP)
  ,/,
  are/VBP
  with/IN
  us/PRP
  this/DT
  evening/NN
  ./.)
(S (Chunk Welcome/NNP) ./.)
(S (/( (Chunk Applause/NNP) ./. )/))
(S
  Our/PRP$
  nation/NN
  is/VBZ
  grateful/JJ
  to/TO
  the/DT
  fallen/VBN
  ,/,
  who/WP
  live/VBP
  in/IN
  the/DT
  memory/NN
  of/IN
  our/PRP$
  country/NN
  ./.)
(S
  We/PRP
  're/VBP
  grateful/JJ
  to/TO
  all/DT
  who/WP
  volunteer/VBP
  to/TO
  wear/VB
  our/PRP$
  nation/NN
  's/POS
  uniform/NN
  --/:
  and/CC
  as/IN
  we/PRP
  honor/VBP
  our/PRP$
  brave/

(S
  And/CC
  that/DT
  will/MD
  present/VB
  future/JJ
  Congresses/NNS
  with/IN
  impossible/JJ
  choices/NNS
  --/:
  staggering/VBG
  tax/NN
  increases/NNS
  ,/,
  immense/JJ
  deficits/NNS
  ,/,
  or/CC
  deep/JJ
  cuts/NNS
  in/IN
  every/DT
  category/NN
  of/IN
  spending/NN
  ./.)
(S
  (Chunk Congress/NNP)
  did/VBD
  not/RB
  act/VB
  last/JJ
  year/NN
  on/IN
  my/PRP$
  proposal/NN
  to/TO
  (Chunk save/VB Social/NNP)
  (Chunk Security/NNP)
  --/:
  (/(
  applause/NN
  )/)
  --/:
  yet/RB
  the/DT
  rising/VBG
  cost/NN
  of/IN
  entitlements/NNS
  is/VBZ
  a/DT
  problem/NN
  that/WDT
  is/VBZ
  not/RB
  going/VBG
  away/RB
  ./.)
(S (/( (Chunk Applause/NNP) ./. )/))
(S
  And/CC
  every/DT
  year/NN
  we/PRP
  fail/VBP
  to/TO
  act/VB
  ,/,
  the/DT
  situation/NN
  gets/VBZ
  worse/JJR
  ./.)
(S
  So/RB
  tonight/JJ
  ,/,
  I/PRP
  ask/VBP
  you/PRP
  to/TO
  join/VB
  me/PRP
  in/IN
  creating/VBG
  a/DT
  commission/NN
  to/TO
  examine/VB
  the/DT
  full/JJ
  impac

(S
  Yet/RB
  as/IN
  we/PRP
  meet/VBP
  these/DT
  immediate/JJ
  needs/NNS
  ,/,
  we/PRP
  must/MD
  also/RB
  address/VB
  deeper/JJR
  challenges/NNS
  that/WDT
  existed/VBD
  before/IN
  the/DT
  storm/NN
  arrived/VBD
  ./.)
(S
  In/IN
  (Chunk New/NNP)
  (Chunk Orleans/NNP)
  and/CC
  in/IN
  other/JJ
  places/NNS
  ,/,
  many/JJ
  of/IN
  our/PRP$
  fellow/JJ
  citizens/NNS
  have/VBP
  felt/VBN
  excluded/VBN
  from/IN
  the/DT
  promise/NN
  of/IN
  our/PRP$
  country/NN
  ./.)
(S
  The/DT
  answer/NN
  is/VBZ
  not/RB
  only/RB
  temporary/JJ
  relief/NN
  ,/,
  but/CC
  schools/NNS
  that/WDT
  teach/VBP
  every/DT
  child/NN
  ,/,
  and/CC
  job/NN
  skills/NNS
  that/IN
  bring/VBG
  upward/JJ
  mobility/NN
  ,/,
  and/CC
  more/JJR
  opportunities/NNS
  to/TO
  own/VB
  a/DT
  home/NN
  and/CC
  start/VB
  a/DT
  business/NN
  ./.)
(S
  As/IN
  we/PRP
  recover/VBP
  from/IN
  a/DT
  disaster/NN
  ,/,
  let/VB
  us/PRP
  also/RB
  work/NN
  for/IN
  the/DT
  day/NN
  

## Chinking
Chinking is a lot like chunking, it is basically a way for you to remove a chunk from a chunk. The chunk that you remove from your chunk is your chink.

In [13]:
for i in tokenized:
    words = nltk.word_tokenize(i)
    tagged = nltk.pos_tag(words)
    
    # Regex
    # Chinking is represented in "}{" in regexp
    chunkGram = r"""Chunk: {<.*>+}
                            }<VB.?|IN|DT>+{"""
    
    #Chunking
    chunkParser = nltk.RegexpParser(chunkGram)
    chunked = chunkParser.parse(tagged)
    #chunked.draw()
    print(chunked)

(S
  (Chunk PRESIDENT/NNP GEORGE/NNP W./NNP BUSH/NNP 'S/POS ADDRESS/NNP)
  BEFORE/IN
  (Chunk A/NNP JOINT/NNP SESSION/NNP)
  OF/IN
  (Chunk THE/NNP CONGRESS/NNP ON/NNP THE/NNP STATE/NNP)
  OF/IN
  (Chunk
    THE/NNP
    UNION/NNP
    January/NNP
    31/CD
    ,/,
    2006/CD
    THE/NNP
    PRESIDENT/NNP
    :/:
    Thank/NNP
    you/PRP)
  all/DT
  (Chunk ./.))
(S
  (Chunk
    Mr./NNP
    Speaker/NNP
    ,/,
    Vice/NNP
    President/NNP
    Cheney/NNP
    ,/,
    members/NNS)
  of/IN
  (Chunk Congress/NNP ,/, members/NNS)
  of/IN
  the/DT
  (Chunk
    Supreme/NNP
    Court/NNP
    and/CC
    diplomatic/JJ
    corps/NN
    ,/,
    distinguished/JJ
    guests/NNS
    ,/,
    and/CC
    fellow/JJ
    citizens/NNS
    :/:)
  Today/VB
  (Chunk our/PRP$ nation/NN)
  lost/VBD
  a/DT
  beloved/VBN
  (Chunk ,/, graceful/JJ ,/, courageous/JJ woman/NN who/WP)
  called/VBD
  (Chunk America/NNP to/TO its/PRP$ founding/NN ideals/NNS and/CC)
  carried/VBD
  on/IN
  a/DT
  (Chunk noble/JJ dream/NN 

(S (Chunk (/( Applause/NNP ./. )/)))
(S
  (Chunk Our/PRP$ nation/NN)
  is/VBZ
  (Chunk grateful/JJ to/TO)
  the/DT
  fallen/VBN
  (Chunk ,/, who/WP)
  live/VBP
  in/IN
  the/DT
  (Chunk memory/NN)
  of/IN
  (Chunk our/PRP$ country/NN ./.))
(S
  (Chunk We/PRP)
  're/VBP
  (Chunk grateful/JJ to/TO)
  all/DT
  (Chunk who/WP)
  volunteer/VBP
  (Chunk to/TO)
  wear/VB
  (Chunk our/PRP$ nation/NN 's/POS uniform/NN --/: and/CC)
  as/IN
  (Chunk we/PRP)
  honor/VBP
  (Chunk our/PRP$ brave/NN troops/NNS ,/,)
  let/VB
  (Chunk us/PRP never/RB)
  forget/VBP
  the/DT
  (Chunk sacrifices/NNS)
  of/IN
  (Chunk America/NNP 's/POS military/JJ families/NNS ./.))
(S (Chunk (/( Applause/NNP ./. )/)))
(S
  (Chunk Our/PRP$ offensive/JJ)
  against/IN
  (Chunk terror/NN)
  involves/VBZ
  (Chunk more/JJR)
  than/IN
  (Chunk military/JJ action/NN ./.))
(S
  (Chunk Ultimately/RB ,/,)
  the/DT
  (Chunk only/JJ way/NN to/TO)
  defeat/VB
  the/DT
  (Chunk terrorists/NNS)
  is/VBZ
  (Chunk to/TO)
  defeat/VB
  (Chu

(S
  This/DT
  (Chunk commission/NN should/MD)
  include/VB
  (Chunk members/NNS)
  of/IN
  (Chunk Congress/NNP)
  of/IN
  both/DT
  (Chunk parties/NNS ,/, and/CC)
  offer/VBP
  (Chunk bipartisan/JJ solutions/NNS ./.))
(S
  (Chunk We/PRP)
  need/VBP
  (Chunk to/TO)
  put/VB
  (Chunk
    aside/RP
    partisan/JJ
    politics/NNS
    and/CC
    work/NN
    together/RB
    and/CC)
  get/VB
  this/DT
  (Chunk problem/NN)
  solved/VBD
  (Chunk ./.))
(S (Chunk (/( Applause/NNP ./. )/)))
(S
  Keeping/VBG
  (Chunk America/NNP competitive/JJ)
  requires/VBZ
  (Chunk us/PRP to/TO)
  open/VB
  (Chunk more/JJR markets/NNS)
  for/IN
  all/DT
  that/DT
  (Chunk Americans/NNPS)
  make/VBP
  (Chunk and/CC)
  grow/VB
  (Chunk ./.))
(S
  (Chunk One/CD out/NN)
  of/IN
  every/DT
  (Chunk five/CD factory/NN jobs/NNS)
  in/IN
  (Chunk America/NNP)
  is/VBZ
  related/VBN
  (Chunk to/TO global/JJ trade/NN ,/, and/CC we/PRP)
  want/VBP
  (Chunk people/NNS everywhere/RB to/TO)
  buy/VB
  (Chunk American/NNP ./

(S
  A/DT
  (Chunk hopeful/JJ society/NN acts/NNS boldly/RB to/TO)
  fight/VB
  (Chunk diseases/NNS)
  like/IN
  (Chunk HIV/AIDS/NNP ,/, which/WDT can/MD)
  be/VB
  prevented/VBN
  (Chunk ,/, and/CC)
  treated/VBD
  (Chunk ,/, and/CC)
  defeated/VBD
  (Chunk ./.))
(S
  (Chunk More/JJR)
  than/IN
  a/DT
  (Chunk million/CD Americans/NNPS)
  live/VBP
  with/IN
  (Chunk HIV/NNP ,/, and/CC half/NN)
  of/IN
  all/DT
  (Chunk AIDS/NNP cases/NNS)
  occur/VBP
  among/IN
  (Chunk African/JJ Americans/NNPS ./.))
(S
  (Chunk I/PRP)
  ask/VBP
  (Chunk Congress/NNP to/TO)
  reform/VB
  (Chunk and/CC)
  reauthorize/VB
  the/DT
  (Chunk Ryan/NNP White/NNP Act/NNP ,/, and/CC)
  provide/VB
  (Chunk new/JJ funding/NN to/TO states/NNS ,/,)
  so/IN
  (Chunk we/PRP)
  end/VBP
  the/DT
  (Chunk waiting/NN lists/NNS)
  for/IN
  (Chunk AIDS/NNP medicines/NNS)
  in/IN
  (Chunk America/NNP ./.))
(S (Chunk (/( Applause/NNP ./. )/)))
(S
  (Chunk We/PRP will/MD also/RB)
  lead/VB
  a/DT
  (Chunk nationwide/JJ effo