In [1]:
from nltk.corpus import wordnet as wn


In [3]:
wn.synsets('motorcar') #同义词集

[Synset('car.n.01')]

In [6]:
wn.synset('car.n.01').lemma_names()

['car', 'auto', 'automobile', 'machine', 'motorcar']

In [8]:
wn.synset('car.n.01').definition()

'a motor vehicle with four wheels; usually propelled by an internal combustion engine'

In [9]:
wn.synset('car.n.01').examples()

['he needs a car to get to work']

In [10]:
wn.synset('car.n.01').lemmas()

[Lemma('car.n.01.car'),
 Lemma('car.n.01.auto'),
 Lemma('car.n.01.automobile'),
 Lemma('car.n.01.machine'),
 Lemma('car.n.01.motorcar')]

#### 为了消除歧义，会标注同义词集和词配对叫做词条。

In [12]:
wn.lemma('car.n.01.automobile')

Lemma('car.n.01.automobile')

In [14]:
wn.lemma('car.n.01.automobile').synset()

Synset('car.n.01')

In [16]:
wn.lemma('car.n.01.automobile').name()

'automobile'

In [17]:
## 模糊的同义词集
wn.synsets('car')

[Synset('car.n.01'),
 Synset('car.n.02'),
 Synset('car.n.03'),
 Synset('car.n.04'),
 Synset('cable_car.n.01')]

In [20]:
for synset in wn.synsets('car'):
    print(synset.lemma_names())

['car', 'auto', 'automobile', 'machine', 'motorcar']
['car', 'railcar', 'railway_car', 'railroad_car']
['car', 'gondola']
['car', 'elevator_car']
['cable_car', 'car']


In [21]:
## 通常我们使用下面方式访问所有包含词的词条
wn.lemmas('car')

[Lemma('car.n.01.car'),
 Lemma('car.n.02.car'),
 Lemma('car.n.03.car'),
 Lemma('car.n.04.car'),
 Lemma('cable_car.n.01.car')]

#### 探索dish


In [23]:
wn.lemmas('dish')

[Lemma('dish.n.01.dish'),
 Lemma('dish.n.02.dish'),
 Lemma('dish.n.03.dish'),
 Lemma('smasher.n.02.dish'),
 Lemma('dish.n.05.dish'),
 Lemma('cup_of_tea.n.01.dish'),
 Lemma('serve.v.06.dish'),
 Lemma('dish.v.02.dish')]

In [29]:
wn.synsets('dish')

[Synset('dish.n.01'),
 Synset('dish.n.02'),
 Synset('dish.n.03'),
 Synset('smasher.n.02'),
 Synset('dish.n.05'),
 Synset('cup_of_tea.n.01'),
 Synset('serve.v.06'),
 Synset('dish.v.02')]

In [31]:
wn.synset('dish.n.01').lemma_names()

['dish']

In [32]:
wn.synset('dish.n.01').definition()

'a piece of dishware normally used as a container for holding or serving food'

In [33]:
wn.synset('dish.n.01').examples()

['we gave them a set of dishes for a wedding present']

In [34]:
wn.synset('dish.n.01').lemmas() #词条 同义词'dish.n.01'和'dish'词配对，叫做词条。

[Lemma('dish.n.01.dish')]

### wordNet层次结构

In [44]:
motorcar = wn.synset('car.n.01')
types_of_motorcar = motorcar.hyponyms()
types_of_motorcar[0]

Synset('ambulance.n.01')

In [55]:
#通过下位词获取具体内容
sorted([lemma.name() for synset in types_of_motorcar for lemma in synset.lemmas()])


['Model_T',
 'S.U.V.',
 'SUV',
 'Stanley_Steamer',
 'ambulance',
 'beach_waggon',
 'beach_wagon',
 'bus',
 'cab',
 'compact',
 'compact_car',
 'convertible',
 'coupe',
 'cruiser',
 'electric',
 'electric_automobile',
 'electric_car',
 'estate_car',
 'gas_guzzler',
 'hack',
 'hardtop',
 'hatchback',
 'heap',
 'horseless_carriage',
 'hot-rod',
 'hot_rod',
 'jalopy',
 'jeep',
 'landrover',
 'limo',
 'limousine',
 'loaner',
 'minicar',
 'minivan',
 'pace_car',
 'patrol_car',
 'phaeton',
 'police_car',
 'police_cruiser',
 'prowl_car',
 'race_car',
 'racer',
 'racing_car',
 'roadster',
 'runabout',
 'saloon',
 'secondhand_car',
 'sedan',
 'sport_car',
 'sport_utility',
 'sport_utility_vehicle',
 'sports_car',
 'squad_car',
 'station_waggon',
 'station_wagon',
 'stock_car',
 'subcompact',
 'subcompact_car',
 'taxi',
 'taxicab',
 'tourer',
 'touring_car',
 'two-seater',
 'used-car',
 'waggon',
 'wagon']

In [57]:
motorcar.hypernyms()##访问上位词

[Synset('motor_vehicle.n.01')]

In [60]:
paths = motorcar.hypernym_paths()
len(paths)

2

In [62]:
[synset.name() for synset in paths[0]]

['entity.n.01',
 'physical_entity.n.01',
 'object.n.01',
 'whole.n.02',
 'artifact.n.01',
 'instrumentality.n.03',
 'container.n.01',
 'wheeled_vehicle.n.01',
 'self-propelled_vehicle.n.01',
 'motor_vehicle.n.01',
 'car.n.01']

In [63]:
[synset.name() for synset in paths[1]]

['entity.n.01',
 'physical_entity.n.01',
 'object.n.01',
 'whole.n.02',
 'artifact.n.01',
 'instrumentality.n.03',
 'conveyance.n.03',
 'vehicle.n.01',
 'wheeled_vehicle.n.01',
 'self-propelled_vehicle.n.01',
 'motor_vehicle.n.01',
 'car.n.01']

In [64]:
##获取最笼统的上位词同义集
motorcar.root_hypernyms()

[Synset('entity.n.01')]

上位词和下位词被称为**词汇关系**，WordNet网络另一个重要的定位方式是从条目到他们的部件（部分）或到包含他们的东西（整体）

In [67]:
wn.synset('tree.n.01').part_meronyms()#局部

[Synset('burl.n.02'),
 Synset('crown.n.07'),
 Synset('limb.n.02'),
 Synset('stump.n.01'),
 Synset('trunk.n.01')]

In [68]:
wn.synset('tree.n.01').substance_meronyms()#组成

[Synset('heartwood.n.01'), Synset('sapwood.n.01')]

In [69]:
wn.synset('tree.n.01').member_holonyms()#整体

[Synset('forest.n.01')]

In [73]:
for synset in wn.synsets('mint',wn.NOUN):
    print('{}:{}'.format(synset.name(),synset.definition()))

batch.n.02:(often followed by `of') a large number or amount or extent
mint.n.02:any north temperate plant of the genus Mentha with aromatic leaves and small mauve flowers
mint.n.03:any member of the mint family of plants
mint.n.04:the leaves of a mint plant used fresh or candied
mint.n.05:a candy that is flavored with a mint oil
mint.n.06:a plant where money is coined by authority of the government


In [74]:
wn.synset('mint.n.04').part_holonyms()#holonyms 部分-整体关系

[Synset('mint.n.02')]

In [75]:
wn.synset('mint.n.04').substance_holonyms()

[Synset('mint.n.05')]

In [77]:
##动词也存在这关系
wn.synset('walk.v.01').entailments()#走路蕴含着抬脚的意思

[Synset('step.v.01')]

In [78]:
wn.synset('eat.v.01').entailments()

[Synset('chew.v.01'), Synset('swallow.v.01')]

In [80]:
wn.synset('tease.v.03').entailments()

[Synset('arouse.v.07'), Synset('disappoint.v.01')]

In [81]:
##查看反义词
wn.lemma('supply.n.02.supply').antonyms()

[Lemma('demand.n.02.demand')]

In [82]:
wn.lemma('rush.v.01.rush').antonyms()

[Lemma('linger.v.04.linger')]

In [84]:
wn.lemma('horizontal.a.01.horizontal').antonyms()

[Lemma('vertical.a.01.vertical'), Lemma('inclined.a.02.inclined')]

In [85]:
##用dir查看词汇关系和同义词集合上定义的其他地方
dir(wn.synset('harmony.n.02'))

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__unicode__',
 '__weakref__',
 '_all_hypernyms',
 '_definition',
 '_examples',
 '_frame_ids',
 '_hypernyms',
 '_instance_hypernyms',
 '_iter_hypernym_lists',
 '_lemma_names',
 '_lemma_pointers',
 '_lemmas',
 '_lexname',
 '_max_depth',
 '_min_depth',
 '_name',
 '_needs_root',
 '_offset',
 '_pointers',
 '_pos',
 '_related',
 '_shortest_hypernym_paths',
 '_wordnet_corpus_reader',
 'also_sees',
 'attributes',
 'causes',
 'closure',
 'common_hypernyms',
 'definition',
 'entailments',
 'examples',
 'frame_ids',
 'hypernym_distances',
 'hypernym_paths',
 'hypernyms',
 'hyponyms',
 'instance_hypernyms',
 'instance_hyponyms',
 'jcn

In [87]:
##查看语义相似度
right = wn.synset('right_whale.n.01')
orca =  wn.synset('orca.n.01')
minke = wn.synset('minke_whale.n.01')
tortoise = wn.synset('tortoise.n.01')
novel = wn.synset('novel.n.01')
right.lowest_common_hypernyms(minke)

[Synset('baleen_whale.n.01')]

In [89]:
wn.synset('baleen_whale.n.01').min_depth() #利用深度来量化同义词的抽象程度

14

In [90]:
#相似度，在0-1范围内的相似度（两者之间没有路径就返回-1）
right.path_similarity(minke)

0.25