# NLTK Wordnet tutorial...

In [1]:
from nltk.corpus import wordnet as wn

In [2]:
#synset means basically synonymous set...

wn.synsets('surfboard')

[Synset('surfboard.n.01'), Synset('surfboard.v.01')]

The output means that it behaves in a context as a noun: n, or verb: v...

In [3]:
wn.synsets('pizza')

[Synset('pizza.n.01')]

The output here is only one, since only one context exists really for pizza, 
namely food...

In [4]:
#pizza.n.01 is called lemma code name...

wn.synset('pizza.n.01').lemma_names()

['pizza', 'pizza_pie']

Other words are more ambiguous...

In [5]:
wn.synsets('printer')

[Synset('printer.n.01'), Synset('printer.n.02'), Synset('printer.n.03')]

In [6]:
wn.synset('printer.n.01').lemma_names()

['printer', 'pressman']

In [7]:
wn.synset('printer.n.02').lemma_names()

['printer']

In [8]:
wn.synset('printer.n.03').lemma_names()

['printer', 'printing_machine']

In [9]:
for synset in wn.synsets('printer'):
    print("\tLemma: {}".format(synset.name()))
    print("\tDefinition: {}".format(synset.definition()))
    print("\tExample: {}".format(synset.examples()))
 

	Lemma: printer.n.01
	Definition: someone whose occupation is printing
	Example: []
	Lemma: printer.n.02
	Definition: (computer science) an output device that prints the results of data processing
	Example: []
	Lemma: printer.n.03
	Definition: a machine that prints
	Example: []


# Lexical Relations...

Wordnet creates a hierarchy of words, from very general like thing or event to specific like soccer. The two basic relation types for the words are hyponyms and hypernyms.

Hyponym — a more specific concept

Hypernym — a more general concept

In [11]:
pizza = wn.synset('pizza.n.01')

In [12]:
sorted([lemma.name() for synset in pizza.hyponyms() for lemma in synset.lemmas()])

['Sicilian_pizza',
 'anchovy_pizza',
 'cheese_pizza',
 'pepperoni_pizza',
 'sausage_pizza']

Sicilian_pizza hyponym pizza 
anchovy_pizza hyponym pizza
cheese_pizza hyponym pizza
pepperoni_pizza hyponym pizza
sausage_pizza hyponym pizza

In [13]:
[lemma.name() for synset in pizza.hypernyms() for lemma in synset.lemmas()]

['dish']

In [14]:
#to obtain top hypernym...

pizza.root_hypernyms()

[Synset('entity.n.01')]

In [20]:
surfboard = wn.synset('surfboard.n.01')
surfboard.root_hypernyms()

[Synset('entity.n.01')]

To calculate the hypernym paths from a given word to its root hypernym, we have:

In [19]:
pizza.hypernym_paths()

[[Synset('entity.n.01'),
  Synset('physical_entity.n.01'),
  Synset('matter.n.03'),
  Synset('substance.n.07'),
  Synset('food.n.01'),
  Synset('nutriment.n.01'),
  Synset('dish.n.02'),
  Synset('pizza.n.01')]]

In [21]:
surfboard.hypernym_paths()

[[Synset('entity.n.01'),
  Synset('physical_entity.n.01'),
  Synset('object.n.01'),
  Synset('whole.n.02'),
  Synset('artifact.n.01'),
  Synset('sheet.n.06'),
  Synset('board.n.03'),
  Synset('surfboard.n.01')]]

In [18]:
[synset.name() for synset in pizza.hypernym_paths()[0]]

['entity.n.01',
 'physical_entity.n.01',
 'matter.n.03',
 'substance.n.07',
 'food.n.01',
 'nutriment.n.01',
 'dish.n.02',
 'pizza.n.01']

In [28]:
pizza.lowest_common_hypernyms(surfboard)

[Synset('physical_entity.n.01')]

In [36]:
wn.synset('food.n.01').lemma_names()

['food', 'nutrient']

In [41]:
wn.synsets('dish')

[Synset('dish.n.01'),
 Synset('dish.n.02'),
 Synset('dish.n.03'),
 Synset('smasher.n.02'),
 Synset('dish.n.05'),
 Synset('cup_of_tea.n.01'),
 Synset('serve.v.06'),
 Synset('dish.v.02')]

In [43]:
pizza.path_similarity(surfboard)

0.07692307692307693

In [47]:
wn.synsets('dish', pos='n')

[Synset('dish.n.01'),
 Synset('dish.n.02'),
 Synset('dish.n.03'),
 Synset('smasher.n.02'),
 Synset('dish.n.05'),
 Synset('cup_of_tea.n.01')]

# Language is arbitrary

In [48]:
for dish in wn.synsets('dish', pos='n'):
    print(dish.lemma_names())

['dish']
['dish']
['dish', 'dishful']
['smasher', 'stunner', 'knockout', 'beauty', 'ravisher', 'sweetheart', 'peach', 'lulu', 'looker', 'mantrap', 'dish']
['dish', 'dish_aerial', 'dish_antenna', 'saucer']
['cup_of_tea', 'bag', 'dish']


In [50]:
for food in wn.synsets('food', pos='n'):
    print(food.lemma_names())

['food', 'nutrient']
['food', 'solid_food']
['food', 'food_for_thought', 'intellectual_nourishment']


In [51]:
for sheet in wn.synsets('sheet', pos='n'):
    print(sheet.lemma_names())

['sheet']
['sheet', 'piece_of_paper', 'sheet_of_paper']
['sheet', 'bed_sheet']
['plane', 'sheet']
['tabloid', 'rag', 'sheet']
['sheet', 'flat_solid']
['sheet', 'tack', 'mainsheet', 'weather_sheet', 'shroud']
['sail', 'canvas', 'canvass', 'sheet']


Someone who surfs with a surfboard is ... a person!!!



In [56]:
person = wn.synsets('person', pos='n')
person

[Synset('person.n.01'), Synset('person.n.02'), Synset('person.n.03')]

A person has multiple senses and paths!!!

In [58]:
[person_sense.hypernym_paths() for person_sense in person]

[[[Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('causal_agent.n.01'),
   Synset('person.n.01')],
  [Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('object.n.01'),
   Synset('whole.n.02'),
   Synset('living_thing.n.01'),
   Synset('organism.n.01'),
   Synset('person.n.01')]],
 [[Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('object.n.01'),
   Synset('whole.n.02'),
   Synset('natural_object.n.01'),
   Synset('body.n.01'),
   Synset('human_body.n.01'),
   Synset('person.n.02')]],
 [[Synset('entity.n.01'),
   Synset('abstraction.n.06'),
   Synset('group.n.01'),
   Synset('collection.n.01'),
   Synset('class.n.01'),
   Synset('grammatical_category.n.01'),
   Synset('person.n.03')]]]

Another example is dining table. This has the following problem!!!

In [62]:
dining_table = wn.synsets('dining table', pos='n')
dining_table

[]

In [63]:
dining_table = wn.synsets('dining_table', pos='n')
dining_table

[Synset('dining_table.n.01')]

In [64]:
[sense.hypernym_paths() for sense in dining_table]

[[[Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('object.n.01'),
   Synset('whole.n.02'),
   Synset('artifact.n.01'),
   Synset('instrumentality.n.03'),
   Synset('furnishing.n.02'),
   Synset('furniture.n.01'),
   Synset('table.n.03'),
   Synset('dining_table.n.01')]]]

Therefore dining table |--> dining_table

In [65]:
wine_glass = wn.synsets('wine glass', pos='n')
wine_glass

[]

In [75]:
wine_glass = wn.synsets('wine_glass', pos='n')
wine_glass

[]

Has neither!!! But....

In [76]:
wine_glass = wn.synsets('wineglass', pos='n')
wine_glass

[Synset('wineglass.n.01')]

In [77]:
[sense.hypernym_paths() for sense in wine_glass]

[[[Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('object.n.01'),
   Synset('whole.n.02'),
   Synset('artifact.n.01'),
   Synset('instrumentality.n.03'),
   Synset('container.n.01'),
   Synset('glass.n.02'),
   Synset('wineglass.n.01')]]]

In [67]:
wine = wn.synsets('wine', pos='n')
wine

[Synset('wine.n.01'), Synset('wine.n.02')]

In [68]:
[sense.hypernym_paths() for sense in wine]

[[[Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('matter.n.03'),
   Synset('substance.n.07'),
   Synset('food.n.01'),
   Synset('beverage.n.01'),
   Synset('alcohol.n.01'),
   Synset('wine.n.01')],
  [Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('matter.n.03'),
   Synset('substance.n.01'),
   Synset('fluid.n.01'),
   Synset('liquid.n.01'),
   Synset('beverage.n.01'),
   Synset('alcohol.n.01'),
   Synset('wine.n.01')],
  [Synset('entity.n.01'),
   Synset('abstraction.n.06'),
   Synset('relation.n.01'),
   Synset('part.n.01'),
   Synset('substance.n.01'),
   Synset('fluid.n.01'),
   Synset('liquid.n.01'),
   Synset('beverage.n.01'),
   Synset('alcohol.n.01'),
   Synset('wine.n.01')],
  [Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('causal_agent.n.01'),
   Synset('agent.n.03'),
   Synset('drug.n.01'),
   Synset('drug_of_abuse.n.01'),
   Synset('alcohol.n.01'),
   Synset('wine.n.01')],
  [Synset('entity.n.01'),
   Synset('

In [69]:
hot_dog = wn.synsets('hot dog', pos='n')
hot_dog

[]

In [72]:
hot_dog = wn.synsets('hot_dog', pos='n')
hot_dog

[Synset('hotdog.n.01'), Synset('hotdog.n.02'), Synset('frank.n.02')]

In [73]:
[sense.hypernym_paths() for sense in hot_dog]

[[[Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('causal_agent.n.01'),
   Synset('person.n.01'),
   Synset('unwelcome_person.n.01'),
   Synset('unpleasant_person.n.01'),
   Synset('egotist.n.01'),
   Synset('exhibitionist.n.02'),
   Synset('hotdog.n.01')],
  [Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('object.n.01'),
   Synset('whole.n.02'),
   Synset('living_thing.n.01'),
   Synset('organism.n.01'),
   Synset('person.n.01'),
   Synset('unwelcome_person.n.01'),
   Synset('unpleasant_person.n.01'),
   Synset('egotist.n.01'),
   Synset('exhibitionist.n.02'),
   Synset('hotdog.n.01')]],
 [[Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('matter.n.03'),
   Synset('substance.n.07'),
   Synset('food.n.01'),
   Synset('nutriment.n.01'),
   Synset('dish.n.02'),
   Synset('snack_food.n.01'),
   Synset('sandwich.n.01'),
   Synset('hotdog.n.02')]],
 [[Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('matter.n.03

In [78]:
sports_ball = wn.synsets('sports ball', pos='n')
sports_ball

[]

In [79]:
sports_ball = wn.synsets('sports_ball', pos='n')
sports_ball

[]

In [80]:
sports_ball = wn.synsets('sportsball', pos='n')
sports_ball

[]

NONE

In [83]:
sports_ball = wn.synsets('ball', pos='n')
sports_ball

[Synset('ball.n.01'),
 Synset('musket_ball.n.01'),
 Synset('ball.n.03'),
 Synset('ball.n.04'),
 Synset('testis.n.01'),
 Synset('ball.n.06'),
 Synset('ball.n.07'),
 Synset('ball.n.08'),
 Synset('ball.n.09'),
 Synset('ball.n.10'),
 Synset('ball.n.11'),
 Synset('ball.n.12')]

Ideas.......

In [93]:
[word.lemma_names() for sense in wine_glass for words in sense.hypernym_paths() for word in words]

[['entity'],
 ['physical_entity'],
 ['object', 'physical_object'],
 ['whole', 'unit'],
 ['artifact', 'artefact'],
 ['instrumentality', 'instrumentation'],
 ['container'],
 ['glass', 'drinking_glass'],
 ['wineglass']]

In [94]:
cell_phone = wn.synsets('cell phone', pos='n')
cell_phone

[]

In [95]:
cell_phone = wn.synsets('cell_phone', pos='n')
cell_phone

[]

In [96]:
cell_phone = wn.synsets('cellphone', pos='n')
cell_phone

[Synset('cellular_telephone.n.01')]

In [97]:
[sense.hypernym_paths() for sense in cell_phone]

[[[Synset('entity.n.01'),
   Synset('physical_entity.n.01'),
   Synset('object.n.01'),
   Synset('whole.n.02'),
   Synset('artifact.n.01'),
   Synset('instrumentality.n.03'),
   Synset('equipment.n.01'),
   Synset('electronic_equipment.n.01'),
   Synset('telephone.n.01'),
   Synset('radiotelephone.n.02'),
   Synset('cellular_telephone.n.01')]]]

In [98]:
potted_plant = wn.synsets('potted plant', pos='n')
potted_plant

[]

In [99]:
potted_plant = wn.synsets('potted_plant', pos='n')
potted_plant

[]

In [100]:
potted_plant = wn.synsets('pottedplant', pos='n')
potted_plant

[]

In [101]:
potted_plant = wn.synsets('plant', pos='n')
potted_plant

[Synset('plant.n.01'),
 Synset('plant.n.02'),
 Synset('plant.n.03'),
 Synset('plant.n.04')]