# Wordnet

## concepts of wordnet

In [1]:
from nltk.corpus import wordnet as wn
import pandas as pd

In [2]:
term = "fruit"
synsets = wn.synsets(term)

In [3]:
for synset in synsets:
    print("Synset: ", synset)
    print("POS: ", synset.lexname())
    print("Definition: ", synset.definition())
    print("Lemmas: ", synset.lemma_names())
    print("Examples: ", synset.examples())

('Synset: ', Synset('fruit.n.01'))
('POS: ', u'noun.plant')
('Definition: ', u'the ripened reproductive body of a seed plant')
('Lemmas: ', [u'fruit'])
('Examples: ', [])
('Synset: ', Synset('yield.n.03'))
('POS: ', u'noun.artifact')
('Definition: ', u'an amount of a product')
('Lemmas: ', [u'yield', u'fruit'])
('Examples: ', [])
('Synset: ', Synset('fruit.n.03'))
('POS: ', u'noun.event')
('Definition: ', u'the consequence of some effort or action')
('Lemmas: ', [u'fruit'])
('Examples: ', [u'he lived long enough to see the fruit of his policies'])
('Synset: ', Synset('fruit.v.01'))
('POS: ', u'verb.creation')
('Definition: ', u'cause to bear fruit')
('Lemmas: ', [u'fruit'])
('Examples: ', [])
('Synset: ', Synset('fruit.v.02'))
('POS: ', u'verb.creation')
('Definition: ', u'bear fruit')
('Lemmas: ', [u'fruit'])
('Examples: ', [u'the trees fruited early this year'])


In [4]:
for action in ["walk", "eat", "digest"]:
    action_syn = wn.synsets(action, pos="v")[0]
    print(action_syn, "-- entails -->", action_syn.entailments())

(Synset('walk.v.01'), '-- entails -->', [Synset('step.v.01')])
(Synset('eat.v.01'), '-- entails -->', [Synset('chew.v.01'), Synset('swallow.v.01')])
(Synset('digest.v.01'), '-- entails -->', [Synset('consume.v.02')])


In [5]:
for synset in wn.synsets("bank"):
    print(synset.name(),"-", synset.definition())

(u'bank.n.01', '-', u'sloping land (especially the slope beside a body of water)')
(u'depository_financial_institution.n.01', '-', u'a financial institution that accepts deposits and channels the money into lending activities')
(u'bank.n.03', '-', u'a long ridge or pile')
(u'bank.n.04', '-', u'an arrangement of similar objects in a row or in tiers')
(u'bank.n.05', '-', u'a supply or stock held in reserve for future use (especially in emergencies)')
(u'bank.n.06', '-', u'the funds held by a gambling house or the dealer in some gambling games')
(u'bank.n.07', '-', u'a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force')
(u'savings_bank.n.02', '-', u'a container (usually with a slot in the top) for keeping money at home')
(u'bank.n.09', '-', u'a building in which the business of banking transacted')
(u'bank.n.10', '-', u'a flight maneuver; aircraft tips laterally about its longitudinal axis (especially in turning

In [8]:
term = "large"
synsets = wn.synsets(term)
print(synsets)
adj = synsets[1]
adj = adj.lemmas()[0]
adj_synonym = adj.synset()
adj_antonym = adj.antonyms()[0].synset()
print("Synonym: ", adj_synonym.name())
print("Defintion: ", adj_synonym.definition())
print("Antonym: ", adj_antonym.name())
print("Defintion: ", adj_antonym.definition())

[Synset('large.n.01'), Synset('large.a.01'), Synset('large.s.02'), Synset('bombastic.s.01'), Synset('big.s.11'), Synset('big.s.05'), Synset('large.s.06'), Synset('big.s.13'), Synset('large.r.01'), Synset('large.r.02'), Synset('boastfully.r.01')]
('Synonym: ', u'large.a.01')
('Defintion: ', u'above average in size or number or quantity or magnitude or extent')
('Antonym: ', u'small.a.01')
('Defintion: ', u'limited or below average in number or quantity or magnitude or extent')


In [9]:
term = "rich"
synsets = wn.synsets(term)[:3]
for adj in synsets:
    adj = adj.lemmas()[0]
    adj_synonym = adj.synset()
    adj_antonym = adj.antonyms()[0].synset()
    print("Synonym: ", adj_synonym.name())
    print("Defintion: ", adj_synonym.definition())
    print("Antonym: ", adj_antonym.name())
    print("Defintion: ", adj_antonym.definition())

('Synonym: ', u'rich_people.n.01')
('Defintion: ', u'people who have possessions and wealth (considered as a group)')
('Antonym: ', u'poor_people.n.01')
('Defintion: ', u'people without possessions or wealth (considered as a group)')
('Synonym: ', u'rich.a.01')
('Defintion: ', u'possessing material wealth')
('Antonym: ', u'poor.a.02')
('Defintion: ', u'having little money or few possessions')
('Synonym: ', u'rich.a.02')
('Defintion: ', u'having an abundant supply of desirable qualities or substances (especially natural resources)')
('Antonym: ', u'poor.a.04')
('Defintion: ', u'lacking in specific resources, qualities or substances')


In [10]:
term = "conscinc"
synsets = wn.synsets(term)
word = synsets[0]
print("Name: ", word.name())
print("Definition: ", word.definition)
hyponyms = word.hyponyms()
print("Total hyponyms: ", len(hyponyms))
print("Sample hyponyms: ")
for hyponym in hyponyms[:10]:
    print(hyponym.name(), " - ", hyponym.definition())

('Name: ', u'tree.n.01')
('Definition: ', <bound method Synset.definition of Synset('tree.n.01')>)
('Total hyponyms: ', 180)
Sample hyponyms: 
(u'aalii.n.01', ' - ', u'a small Hawaiian tree with hard dark wood')
(u'acacia.n.01', ' - ', u'any of various spiny trees or shrubs of the genus Acacia')
(u'african_walnut.n.01', ' - ', u'tropical African timber tree with wood that resembles mahogany')
(u'albizzia.n.01', ' - ', u'any of numerous trees of the genus Albizia')
(u'alder.n.02', ' - ', u'north temperate shrubs or trees having toothed leaves and conelike fruit; bark is used in tanning and dyeing and the wood is rot-resistant')
(u'angelim.n.01', ' - ', u'any of several tropical American trees of the genus Andira')
(u'angiospermous_tree.n.01', ' - ', u'any tree having seeds and ovules contained in the ovary')
(u'anise_tree.n.01', ' - ', u'any of several evergreen shrubs and small trees of the genus Illicium')
(u'arbor.n.01', ' - ', u'tree (as opposed to shrub)')
(u'aroeira_blanca.n.01', 

In [11]:
hypernyms = word.hypernyms()
print(hypernyms)

[Synset('woody_plant.n.01')]


In [12]:
#get total hierarchy
hypernym_paths = word.hypernym_paths()
print(" -> ".join(synset.name() for synset in hypernym_paths[0]))

entity.n.01 -> physical_entity.n.01 -> object.n.01 -> whole.n.02 -> living_thing.n.01 -> organism.n.01 -> plant.n.02 -> vascular_plant.n.01 -> woody_plant.n.01 -> tree.n.01


In [13]:
member_holonyms = word.member_holonyms()
print("Number of holonyms: ", len(member_holonyms))
print("Member holonyms for", [word.name()],":-")
for holonym in member_holonyms:
    print(holonym.name(), " - ", holonym.definition())

('Number of holonyms: ', 1)
('Member holonyms for', [u'tree.n.01'], ':-')
(u'forest.n.01', ' - ', u'the trees and other plants in a large densely wooded area')


In [14]:
part_meronyms = word.part_meronyms()
print("Number of meronyms: ", len(part_meronyms))
print("Member meronyms for [word]:-")
for meronym in part_meronyms:
    print(meronym.name(), " - ", meronym.definition())

('Number of meronyms: ', 5)
Member meronyms for [word]:-
(u'burl.n.02', ' - ', u'a large rounded outgrowth on the trunk or branch of a tree')
(u'crown.n.07', ' - ', u'the upper branches and leaves of a tree or other plant')
(u'limb.n.02', ' - ', u'any of the main branches arising from the trunk or a bough of a tree')
(u'stump.n.01', ' - ', u'the base part of a tree that remains standing after the tree has been felled')
(u'trunk.n.01', ' - ', u'the main stem of a tree; usually covered with bark; the bole is usually the part that is commercially useful for lumber')


In [15]:
substance_meronyms = word.substance_meronyms()
print("Number of meronyms: ", len(substance_meronyms))
print("Member meronyms for [word]:-")
for meronym in substance_meronyms:
    print(meronym.name(), " - ", meronym.definition())

('Number of meronyms: ', 2)
Member meronyms for [word]:-
(u'heartwood.n.01', ' - ', u'the older inactive central wood of a tree or woody plant; usually darker and denser than the surrounding sapwood')
(u'sapwood.n.01', ' - ', u'newly formed outer wood lying between the cambium and the heartwood of a tree or woody plant; usually light colored; active in water conduction')


## Semantic Relationships and Similarities

In [17]:
word1 = wn.synset("tree.n.01")
word2 = wn.synset("lion.n.01")
word3 = wn.synset("tiger.n.02")
word4 = wn.synset("cat.n.01")
word5 = wn.synset("dog.n.01")

In [18]:
# create enteties and extract names and definitions
entities = [word1, word2, word3, word4, word5]
entity_names = [entity.name().split(".")[0] for entity in entities]
entity_definitions = [entity.definition() for entity in entities]

for entity, definition in zip(entity_names, entity_definitions):
    print(entity, "-", definition)

(u'tree', '-', u'a tall perennial woody plant having a main trunk and branches forming a distinct elevated crown; includes both gymnosperms and angiosperms')
(u'lion', '-', u'large gregarious predatory feline of Africa and India having a tawny coat with a shaggy mane in the male')
(u'tiger', '-', u'large feline of forests in most of Asia having a tawny coat with black stripes; endangered')
(u'cat', '-', u'feline mammal usually having thick soft fur and no ability to roar: domestic cats; wildcats')
(u'dog', '-', u'a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds')


In [19]:
common_hypernyms = []
for entity in entities: 
    #get pairwise lowes common hypernyms
    common_hypernyms.append([entity.lowest_common_hypernyms(compared_entity)[0].name().split(".")[0] 
                             for compared_entity in entities])

# build pairwise matrix
common_hypernym_frame = pd.DataFrame(common_hypernyms,
                                   index=entity_names,
                                   columns=entity_names)
print(common_hypernym_frame)

           tree       lion      tiger        cat        dog
tree       tree   organism   organism   organism   organism
lion   organism       lion    big_cat     feline  carnivore
tiger  organism    big_cat      tiger     feline  carnivore
cat    organism     feline     feline        cat  carnivore
dog    organism  carnivore  carnivore  carnivore        dog


In [20]:
# path similarities
similarities = []
for entity in entities:
    similarities.append([round(entity.path_similarity(compared_entity), 2)
                        for compared_entity in entities])
similarity_frame = pd.DataFrame(similarities,
                               index=entity_names,
                               columns=entity_names)
print(similarity_frame)

       tree  lion  tiger   cat   dog
tree   1.00  0.07   0.07  0.08  0.13
lion   0.07  1.00   0.33  0.25  0.17
tiger  0.07  0.33   1.00  0.25  0.17
cat    0.08  0.25   0.25  1.00  0.20
dog    0.13  0.17   0.17  0.20  1.00


## Word Sense Disambiguation

In [21]:
from nltk.wsd import lesk
from nltk import word_tokenize

In [22]:
samples = [("The fruits on that plant have ripened", "n"),
          ("He finally reaped the fruit of his hard work as he won the race", "n")]
word = "fruit"

In [23]:
def getDisamb(samples, word):
    for sentence, pos_tag in samples:
        word_syn = lesk(word_tokenize(sentence.lower()), word, pos_tag)
        print("Sentence: ", sentence)
        print("Word synset: ", word_syn)
        print("Corresponding definition: ", word_syn.definition())
        print()

In [24]:
getDisamb(samples, word)

('Sentence: ', 'The fruits on that plant have ripened')
('Word synset: ', Synset('fruit.n.01'))
('Corresponding definition: ', u'the ripened reproductive body of a seed plant')
()
('Sentence: ', 'He finally reaped the fruit of his hard work as he won the race')
('Word synset: ', Synset('fruit.n.03'))
('Corresponding definition: ', u'the consequence of some effort or action')
()


In [25]:
samples = [("Lead is a very soft malleable metal", "n"),
          ("John is the actor who plays the lead in that movie", "n"),
          ("This road leads to nowhere", "v")]
word = "lead"

getDisamb(samples,word)

('Sentence: ', 'Lead is a very soft malleable metal')
('Word synset: ', Synset('lead.n.02'))
('Corresponding definition: ', u'a soft heavy toxic malleable metallic element; bluish white when freshly cut but tarnishes readily to dull grey')
()
('Sentence: ', 'John is the actor who plays the lead in that movie')
('Word synset: ', Synset('star.n.04'))
('Corresponding definition: ', u'an actor who plays a principal role')
()
('Sentence: ', 'This road leads to nowhere')
('Word synset: ', Synset('run.v.23'))
('Corresponding definition: ', u'cause something to pass or lead somewhere')
()
