In [1]:
import nltk
#nltk.download()
##### python -m nltk.downloader all
##### python -m nltk.downloader -d /usr/local/share/nltk_data all


In [2]:
nltk.__version__

'3.2.4'

## WordNet
WordNet is the lexical database i.e. dictionary for the English language, specifically designed for natural language processing.
Synset is a special kind of a simple interface that is present in NLTK to look up words in WordNet. Synset instances are the groupings of synonymous words that express the same concept. Some of the words have only one Synset and some have several.


# Find synonyms and antonyms

In [3]:
#nltk.download('wordnet')
from nltk.corpus import wordnet
syn=wordnet.synsets('happy')
print ("Synonyms of happy .. ", syn)
print ("Definition:",syn[0].definition())
print ("Example:",syn[0].examples())
print ("Lemma:",syn[0].lemmas())

Synonyms of happy ..  [Synset('happy.a.01'), Synset('felicitous.s.02'), Synset('glad.s.02'), Synset('happy.s.04')]
Definition: enjoying or showing or marked by joy or pleasure
Example: ['a happy smile', 'spent many happy days on the beach', 'a happy marriage']
Lemma: [Lemma('happy.a.01.happy')]


In [15]:
syn=wordnet.synsets('bike')
print ("Synonyms of bike .. ", syn)
print ("Definition:",syn[0].definition())
print ("Lemma:",syn[0].lemmas())

Synonyms of bike ..  [Synset('motorcycle.n.01'), Synset('bicycle.n.01'), Synset('bicycle.v.01')]
Definition: a motor vehicle with two wheels and a strong frame
Lemma: [Lemma('motorcycle.n.01.motorcycle'), Lemma('motorcycle.n.01.bike')]


<img src = "./images/synset.PNG">

In [4]:
print (wordnet.synsets('dog'))
print (wordnet.synsets('dog', pos=wordnet.VERB)) # pos argument which lets you constrain the part of speech of the word

[Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'), Synset('frank.n.02'), Synset('pawl.n.01'), Synset('andiron.n.01'), Synset('chase.v.01')]
[Synset('chase.v.01')]


Synset results in form : lemma.pos.number
* lemma: is the word’s morphological stem
* pos: is one of the module attributes ADJ,  ADV, NOUN or VERB
* number: is the sense number, counting from 0

****************************************************************************************

The WordNet corpus reader gives access to the Open Multilingual WordNet, using ISO-639 language codes.

In [5]:
sorted(wordnet.langs())

['als',
 'arb',
 'bul',
 'cat',
 'cmn',
 'dan',
 'ell',
 'eng',
 'eus',
 'fas',
 'fin',
 'fra',
 'glg',
 'heb',
 'hrv',
 'ind',
 'ita',
 'jpn',
 'nld',
 'nno',
 'nob',
 'pol',
 'por',
 'qcn',
 'slv',
 'spa',
 'swe',
 'tha',
 'zsm']

In [8]:
wordnet.synsets(b'\xe7\x8a\xac'.decode('utf-8'), lang='jpn')

[Synset('dog.n.01'), Synset('spy.n.01')]

## Hypernyms and Hyponyms –

Hypernyms: More abstract terms
Hyponyms: More specific terms.

In [12]:
syn = wordnet.synsets('hello')[0]
  
print ("Synset name :  ", syn.name())
  
print ("\nSynset abstract term :  ", syn.hypernyms())
  
print ("\nSynset specific term :  ", 
       syn.hypernyms()[0].hyponyms())
  
syn.root_hypernyms()
  
print ("\nSynset root hypernerm :  ", syn.root_hypernyms())

Synset name :   hello.n.01

Synset abstract term :   [Synset('greeting.n.01')]

Synset specific term :   [Synset('calling_card.n.02'), Synset('good_afternoon.n.01'), Synset('good_morning.n.01'), Synset('hail.n.03'), Synset('hello.n.01'), Synset('pax.n.01'), Synset('reception.n.01'), Synset('regard.n.03'), Synset('salute.n.02'), Synset('salute.n.03'), Synset('welcome.n.02'), Synset('well-wishing.n.01')]

Synset root hypernerm :   [Synset('entity.n.01')]


In [10]:
dog = wordnet.synset('dog.n.01')
print ("hypernyms : ",dog.hypernyms())
print ("hyponyms: ", dog.hyponyms())
print("member_holonyms :",dog.member_holonyms())
print("root_hypernyms : ",dog.root_hypernyms())
print ("lowest_common_hypernyms : ", wordnet.synset('dog.n.01').lowest_common_hypernyms(wordnet.synset('cat.n.01')))

hypernyms :  [Synset('canine.n.02'), Synset('domestic_animal.n.01')]
hyponyms:  [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'), Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'), Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'), Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), Synset('pug.n.01'), Synset('puppy.n.01'), Synset('spitz.n.01'), Synset('toy_dog.n.01'), Synset('working_dog.n.01')]
member_holonyms : [Synset('canis.n.01'), Synset('pack.n.06')]
root_hypernyms :  [Synset('entity.n.01')]
lowest_common_hypernyms :  [Synset('carnivore.n.01')]


In [13]:

syn = wordnet.synsets('hello')[0]
print ("Syn tag : ", syn.pos())
  
syn = wordnet.synsets('doing')[0]
print ("Syn tag : ", syn.pos())
  
syn = wordnet.synsets('beautiful')[0]
print ("Syn tag : ", syn.pos())
  
syn = wordnet.synsets('quickly')[0]
print ("Syn tag : ", syn.pos())

Syn tag :  n
Syn tag :  v
Syn tag :  a
Syn tag :  r


In [11]:
# antonyms
# To get the list of antonyms, we first need to check the lemmas- are there antonyms?
syn = list()
ant = []
for synset in wordnet.synsets("Worse"):
   for lemma in synset.lemmas():
      syn.append(lemma.name())    #add the synonyms
      if lemma.antonyms():    #When antonyms are available, add them into the list
          ant.append(lemma.antonyms()[0].name())
print('Synonyms: ' + str(syn))
print('Antonyms: ' + str(ant))

Synonyms: ['worse', 'worse', 'worse', 'worsened', 'bad', 'bad', 'big', 'bad', 'tough', 'bad', 'spoiled', 'spoilt', 'regretful', 'sorry', 'bad', 'bad', 'uncollectible', 'bad', 'bad', 'bad', 'risky', 'high-risk', 'speculative', 'bad', 'unfit', 'unsound', 'bad', 'bad', 'bad', 'forged', 'bad', 'defective', 'worse']
Antonyms: ['better', 'better', 'good', 'unregretful']
