In [1]:
import re
from nltk.corpus import wordnet

# Word Replacement

In [2]:
R_patterns = [(r'won\'t', 'will not'),(r'can\'t', 'cannot'),(r'i\'m', 'i am'),(r'(\w+)\'ll', 'g<1> will'),(r'(\w+)n\'t', 'g<1> not'),(r'(\w+)\'ve', 'g<1> have'),(r'(\w+)\'s', 'g<1> is'),(r'(\w+)\'re', 'g<1> are'),]

In [3]:
class REReplacer(object):
    def __init__ (self, pattern = R_patterns):
        self.pattern = [(re.compile(regex), repl) for (regex, repl) in pattern]
    def replace(self, text):
        s = text
        for (pattern, repl) in self.pattern:
            s = re.sub(pattern, repl, s)
        return s

In [4]:
rep_word = REReplacer()
rep_word.replace("I won't do it")

'I will not do it'

## Synonym replacement

In [5]:
class word_syn_replacer(object):
    def __init__(self, word_map):
        self.word_map = word_map
    def replace(self, word):
        return self.word_map.get(word, word)
rep_syn = word_syn_replacer({'bday' :'birthday'})
rep_syn.replace('bday')

'birthday'

## Antonym replacement

In [6]:
class word_antonym_replacer(object):
    def replace(self, word, pos = None):
        antonyms = set()
        for syn in wordnet.synsets(word, pos = pos):
            for lemma in syn.lemmas():
                for antonym in lemma.antonyms():
                    antonyms.add(antonym.name())
        if len(antonyms) == 1:
            return antonyms.pop()
        else:
            return None
    def replace_negations(self, sent):
        i, l = 0, len(sent)
        words = []
        while i < l:
            word = sent[i]
            if word == 'not' and i+1 < l:
                ant = self.replace(sent[i+1])
                if ant:
                    words.append(ant)
                    i += 2
                    continue
            words.append(word)
            i += 1
        return words

rep_antonym = word_antonym_replacer()
rep_antonym.replace('uglify')

'beautify'

In [7]:
sentence = ["Let us", "not", "uglify", "our", "country"]
rep_antonym.replace_negations(sentence)

['Let us', 'beautify', 'our', 'country']

# Parse Tree

In [8]:
from nltk.tree import *
left = Tree('NP', [Tree('A', ['The']), Tree('NN', ['sun'])])
right = Tree('VP', [Tree('V', ['moves']), Tree('NN',['eastward'])])
tree = Tree('S', [left, right])
print(tree)

(S (NP (A The) (NN sun)) (VP (V moves) (NN eastward)))


In [9]:
tree.label(), left.label(), right.label()

('S', 'NP', 'VP')

In [10]:
print(tree.pformat_latex_qtree())

\Tree [.S
        [.NP [.A The ] [.NN sun ] ]
        [.VP [.V moves ] [.NN eastward ] ] ]


In [11]:
tree.pretty_print()

             S                    
      _______|_________            
     NP                VP         
  ___|___          ____|_____      
 A       NN       V          NN   
 |       |        |          |     
The     sun     moves     eastward



In [12]:
tree.pretty_print(unicodelines=True, nodedist=2)

                S                       
      ┌─────────┴───────────┐             
      NP                    VP          
 ┌────┴────┐          ┌─────┴──────┐      
 A         NN         V            NN   
 │         │          │            │      
The       sun       moves       eastward



In [13]:
print(tree.treepositions())

[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0)]


In [14]:
print(tree[0,1,0])

sun


In [15]:
tree2 = Tree.fromstring('(S (NP (A The) (NN sun)) (VP(V moves) (NN eastward)))')
print(tree2)

(S (NP (A The) (NN sun)) (VP (V moves) (NN eastward)))


In [16]:
tree2==tree

True

In [17]:
tree3 = ImmutableTree.fromstring('(S (NP (A The) (NNmoon)) (VP (V moves) (NN northward)))')
print(tree3)
type(tree3)

(S (NP (A The) (NNmoon )) (VP (V moves) (NN northward)))


nltk.tree.immutable.ImmutableTree

In [18]:
tree4 = Tree.fromstring('(S (NP (A The) (NN moon))(VP (V moves) (NN northward)))')
tree4.pretty_print()

              S                     
      ________|_________             
     NP                 VP          
  ___|___           ____|______      
 A       NN        V           NN   
 |       |         |           |     
The     moon     moves     northward



In [19]:
tree5 = Tree.fromstring('(S (NP (A The) (NN car)) (VP (V moves) (NN straight)))', read_node=lambda s:'<%s>'%s, read_leaf=lambda s: ' "%s" ' %s )
print(tree5)
tree5.pretty_print()

(<S>
  (<NP> (<A>  "The" ) (<NN>  "car" ))
  (<VP> (<V>  "moves" ) (<NN>  "straight" )))
                     <S>                            
          ____________|_____________                 
        <NP>                       <VP>             
    _____|______              ______|________        
  <A>          <NN>         <V>             <NN>    
   |            |            |               |       
 "The"        "car"       "moves"        "straight" 



## Basic Parts of Speech (POS) tagging

In [28]:
import nltk
from nltk import word_tokenize
sentence = "I am going to school"
print (nltk.pos_tag(word_tokenize(sentence)))

[('I', 'PRP'), ('am', 'VBP'), ('going', 'VBG'), ('to', 'TO'), ('school', 'NN')]


## Named Entity Recognition

In [22]:
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk
nltk.download('punkt')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to C:\Users\Shahana
[nltk_data]     S\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package maxent_ne_chunker to C:\Users\Shahana
[nltk_data]     S\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping chunkers\maxent_ne_chunker.zip.
[nltk_data] Downloading package words to C:\Users\Shahana
[nltk_data]     S\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\words.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Shahana S\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.


True

In [23]:
text = "Mark Zuckerberg is one of the founders of Facebook, a company from the United States"

In [24]:
# Tokenize the text
words = word_tokenize(text)

In [25]:
tags = pos_tag(words)
tags

[('Mark', 'NNP'),
 ('Zuckerberg', 'NNP'),
 ('is', 'VBZ'),
 ('one', 'CD'),
 ('of', 'IN'),
 ('the', 'DT'),
 ('founders', 'NNS'),
 ('of', 'IN'),
 ('Facebook', 'NNP'),
 (',', ','),
 ('a', 'DT'),
 ('company', 'NN'),
 ('from', 'IN'),
 ('the', 'DT'),
 ('United', 'NNP'),
 ('States', 'NNPS')]

In [29]:
ner_tags = ne_chunk(tags)
ner_tags

ModuleNotFoundError: No module named 'svgling'

Tree('S', [Tree('PERSON', [('Mark', 'NNP')]), Tree('ORGANIZATION', [('Zuckerberg', 'NNP')]), ('is', 'VBZ'), ('one', 'CD'), ('of', 'IN'), ('the', 'DT'), ('founders', 'NNS'), ('of', 'IN'), Tree('GPE', [('Facebook', 'NNP')]), (',', ','), ('a', 'DT'), ('company', 'NN'), ('from', 'IN'), ('the', 'DT'), Tree('GPE', [('United', 'NNP'), ('States', 'NNPS')])])

In [27]:
for entity in ner_tags:
    if isinstance(entity, nltk.Tree):
        entity_words = [word for word, tag in entity.leaves()]
        entity_name = " ".join(entity_words)
        entity_label = entity.label()
        print(f"Entity: {entity_name}, Label: {entity_label}")

Entity: Mark, Label: PERSON
Entity: Zuckerberg, Label: ORGANIZATION
Entity: Facebook, Label: GPE
Entity: United States, Label: GPE
