## 1. WordNet senses and synonyms

In [5]:
from nltk.corpus import wordnet as wn
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [6]:
# words are grouped conceptually into synsets (synonym sets)
wn.synsets('motorcar')

[Synset('car.n.01')]

In [7]:
wn.synsets('spell')

[Synset('enchantment.n.02'),
 Synset('go.n.01'),
 Synset('while.n.01'),
 Synset('spell.n.04'),
 Synset('spell.v.01'),
 Synset('spell.v.02'),
 Synset('spell.v.03'),
 Synset('spell.v.04'),
 Synset('spell.v.05'),
 Synset('spell.v.06')]

In [8]:
# synset members are referred to as 'lemmas'
wn.synset('car.n.01').lemma_names()

['car', 'auto', 'automobile', 'machine', 'motorcar']

In [9]:
wn.synset('car.n.01').definition()

'a motor vehicle with four wheels; usually propelled by an internal combustion engine'

In [10]:
wn.synset('car.n.01').examples()

['he needs a car to get to work']

In [11]:
# get all the lemmas for a given synset
wn.synset('car.n.01').lemmas() 

[Lemma('car.n.01.car'),
 Lemma('car.n.01.auto'),
 Lemma('car.n.01.automobile'),
 Lemma('car.n.01.machine'),
 Lemma('car.n.01.motorcar')]

In [12]:
wn.lemma('car.n.01.automobile') 

Lemma('car.n.01.automobile')

In [13]:
wn.lemma('car.n.01.automobile').synset()

Synset('car.n.01')

In [14]:
wn.lemma('car.n.01.automobile').name()

'automobile'

In [15]:
# Multiple synsets correspond to multiple senses
wn.synsets('car')

[Synset('car.n.01'),
 Synset('car.n.02'),
 Synset('car.n.03'),
 Synset('car.n.04'),
 Synset('cable_car.n.01')]

In [16]:
# we can iterate thru these senses to see all the lemmas
for synset in wn.synsets('car'):
     print(synset.lemma_names())

['car', 'auto', 'automobile', 'machine', 'motorcar']
['car', 'railcar', 'railway_car', 'railroad_car']
['car', 'gondola']
['car', 'elevator_car']
['cable_car', 'car']


In [17]:
wn.lemmas('car')

[Lemma('car.n.01.car'),
 Lemma('car.n.02.car'),
 Lemma('car.n.03.car'),
 Lemma('car.n.04.car'),
 Lemma('cable_car.n.01.car')]

## 2. The WordNet hierarchy

In [18]:
motorcar = wn.synset('car.n.01')

In [19]:
# find more specific concepts
types_of_motorcar = motorcar.hyponyms()

In [20]:
types_of_motorcar

[Synset('ambulance.n.01'),
 Synset('beach_wagon.n.01'),
 Synset('bus.n.04'),
 Synset('cab.n.03'),
 Synset('compact.n.03'),
 Synset('convertible.n.01'),
 Synset('coupe.n.01'),
 Synset('cruiser.n.01'),
 Synset('electric.n.01'),
 Synset('gas_guzzler.n.01'),
 Synset('hardtop.n.01'),
 Synset('hatchback.n.01'),
 Synset('horseless_carriage.n.01'),
 Synset('hot_rod.n.01'),
 Synset('jeep.n.01'),
 Synset('limousine.n.01'),
 Synset('loaner.n.02'),
 Synset('minicar.n.01'),
 Synset('minivan.n.01'),
 Synset('model_t.n.01'),
 Synset('pace_car.n.01'),
 Synset('racer.n.02'),
 Synset('roadster.n.01'),
 Synset('sedan.n.01'),
 Synset('sport_utility.n.01'),
 Synset('sports_car.n.01'),
 Synset('stanley_steamer.n.01'),
 Synset('stock_car.n.01'),
 Synset('subcompact.n.01'),
 Synset('touring_car.n.01'),
 Synset('used-car.n.01')]

In [21]:
sorted(lemma.name() for synset in types_of_motorcar for lemma in synset.lemmas())

['Model_T',
 'S.U.V.',
 'SUV',
 'Stanley_Steamer',
 'ambulance',
 'beach_waggon',
 'beach_wagon',
 'bus',
 'cab',
 'compact',
 'compact_car',
 'convertible',
 'coupe',
 'cruiser',
 'electric',
 'electric_automobile',
 'electric_car',
 'estate_car',
 'gas_guzzler',
 'hack',
 'hardtop',
 'hatchback',
 'heap',
 'horseless_carriage',
 'hot-rod',
 'hot_rod',
 'jalopy',
 'jeep',
 'landrover',
 'limo',
 'limousine',
 'loaner',
 'minicar',
 'minivan',
 'pace_car',
 'patrol_car',
 'phaeton',
 'police_car',
 'police_cruiser',
 'prowl_car',
 'race_car',
 'racer',
 'racing_car',
 'roadster',
 'runabout',
 'saloon',
 'secondhand_car',
 'sedan',
 'sport_car',
 'sport_utility',
 'sport_utility_vehicle',
 'sports_car',
 'squad_car',
 'station_waggon',
 'station_wagon',
 'stock_car',
 'subcompact',
 'subcompact_car',
 'taxi',
 'taxicab',
 'tourer',
 'touring_car',
 'two-seater',
 'used-car',
 'waggon',
 'wagon']

In [22]:
# find more generic concepts
motorcar.hypernyms()

[Synset('motor_vehicle.n.01')]

In [23]:
# a polyhierarchy means we can have multiple parents
paths = motorcar.hypernym_paths()

In [24]:
len(paths)

2

In [25]:
# let's look at the first path
[synset.name() for synset in paths[0]]

['entity.n.01',
 'physical_entity.n.01',
 'object.n.01',
 'whole.n.02',
 'artifact.n.01',
 'instrumentality.n.03',
 'container.n.01',
 'wheeled_vehicle.n.01',
 'self-propelled_vehicle.n.01',
 'motor_vehicle.n.01',
 'car.n.01']

In [26]:
# let's look at the second path
[synset.name() for synset in paths[1]]

['entity.n.01',
 'physical_entity.n.01',
 'object.n.01',
 'whole.n.02',
 'artifact.n.01',
 'instrumentality.n.03',
 'conveyance.n.03',
 'vehicle.n.01',
 'wheeled_vehicle.n.01',
 'self-propelled_vehicle.n.01',
 'motor_vehicle.n.01',
 'car.n.01']

In [27]:
# what's the root node?
motorcar.root_hypernyms()

[Synset('entity.n.01')]

## 3. Measuring semantic similarity

In [28]:
# Let's instantiate some terms
right = wn.synset('right_whale.n.01')
minke = wn.synset('minke_whale.n.01')
orca = wn.synset('orca.n.01')
tortoise = wn.synset('tortoise.n.01')
plankton = wn.synset('plankton.n.01')
novel = wn.synset('novel.n.01')

In [29]:
# find the common parent
right.lowest_common_hypernyms(tortoise)

[Synset('vertebrate.n.01')]

In [30]:
# calculate similarity
right.path_similarity(novel)

0.043478260869565216

### Your turn:
Rank the degrees of similarity between the word ‘university’ and each of the following: college, school, factory, supermarket, turtle

In [31]:
university = wn.synset('university.n.01')

In [32]:
university.path_similarity(wn.synset('college.n.01'))

0.3333333333333333