In [1]:
!pip install nltk==3.5.0





# 2. WordNet

In [14]:
# Import wordnet
import nltk
nltk.download('wordnet')
from nltk.corpus import wordnet as wn

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\kaurnavdeep1\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [13]:
# Get all distractors for the word “lion”. Here we are extracting the first sense of the word Lion and extracting co-hyponyms of the word lion as distractors.

# Distractors from Wordnet
def get_distractors_wordnet(syn,word):
    distractors=[]
    word= word.lower()
    orig_word = word
    if len(word.split())>0:
        word = word.replace(" ","_")`
    hypernym = syn.hypernyms()
    if len(hypernym) == 0: 
        return distractors
    for item in hypernym[0].hyponyms():
        name = item.lemmas()[0].name()
#         print ("name ",name, " word",orig_word)
        if name == orig_word:
            continue
        name = name.replace("_"," ")
        name = " ".join(w.capitalize() for w in name.split())
        if name is not None and name not in distractors:
            distractors.append(name)
    return distractors
original_word = "lion"
synset_to_use = wn.synsets(original_word,'n')[0]
distractors_calculated = get_distractors_wordnet(synset_to_use,original_word)

print('Original Word : ', original_word)
print('Distractors Calculated : ',distractors_calculated)


Original Word :  lion
Distractors Calculated :  ['Cheetah', 'Jaguar', 'Leopard', 'Liger', 'Saber-toothed Tiger', 'Snow Leopard', 'Tiger', 'Tiglon']


In [10]:
# Similarly, for the word cricket, which has two senses (one for insect and one for the game) we get different distractors for each depending on which sense we use.

#  An example of a word with two different senses
original_word = "cricket"
syns = wn.synsets(original_word,'n')
for syn in syns:
  print (syn, ": ",syn.definition(),"\n" )
synset_to_use = wn.synsets(original_word,'n')[0]
distractors_calculated = get_distractors_wordnet(synset_to_use,original_word)
print ("\noriginal word: ",original_word.capitalize())
print (distractors_calculated)
original_word = "cricket"
synset_to_use = wn.synsets(original_word,'n')[1]
distractors_calculated = get_distractors_wordnet(synset_to_use,original_word)
print ("\noriginal word: ",original_word.capitalize())
print (distractors_calculated)

Synset('cricket.n.01') :  leaping insect; male makes chirping noises by rubbing the forewings together 

Synset('cricket.n.02') :  a game played with a ball and bat by two teams of 11 players; teams take turns trying to score runs 


original word:  Cricket
['Grasshopper']

original word:  Cricket
['Ball Game', 'Field Hockey', 'Football', 'Hurling', 'Lacrosse', 'Polo', 'Pushball', 'Ultimate Frisbee']


# 2. ConceptNet

In [16]:
import requests
import json
import re
import random
import pprint


In [32]:
# Distractors from http://conceptnet.io/
def get_distractors_conceptnet(word):
    word = word.lower()
    original_word= word
    if (len(word.split())>0):
        word = word.replace(" ","_")
    distractor_list = [] 
    url = "http://api.conceptnet.io/query?node=/c/en/%s/n&rel=/r/PartOf&start=/c/en/%s&limit=5"%(word,word)
    obj = requests.get(url).json()

    for edge in obj['edges']:
        link = edge['end']['term'] 

        url2 = "http://api.conceptnet.io/query?node=%s&rel=/r/PartOf&end=%s&limit=10"%(link,link)
        obj2 = requests.get(url2).json()
        for edge in obj2['edges']:
            word2 = edge['start']['label']
            if word2 not in distractor_list and original_word.lower() not in word2.lower():
                distractor_list.append(word2)
                   
    return distractor_list

original_word = "Contract"
distractors = get_distractors_conceptnet(original_word)

print ("Original word: ",original_word)
print ("\nDistractors: ",distractors)

Original word:  Contract

Distractors:  ['slam', 'ruff', 'nasal', 'arch', 'bid', 'trestle', 'pilothouse', 'pier', 'A suspension cable']


# 3. Sense2vec 

In [19]:
!pip install sense2vec==1.0.2

Collecting sense2vec==1.0.2

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pke 2.0.0 requires spacy>=3.2.3, but you have spacy 2.3.7 which is incompatible.
en-core-web-sm 3.2.0 requires spacy<3.3.0,>=3.2.0, but you have spacy 2.3.7 which is incompatible.



  Downloading sense2vec-1.0.2.tar.gz (54 kB)
     ---------------------------------------- 54.7/54.7 kB 1.4 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting spacy<3.0.0,>=2.2.3
  Downloading spacy-2.3.7-cp37-cp37m-win_amd64.whl (9.6 MB)
     ---------------------------------------- 9.6/9.6 MB 7.5 MB/s eta 0:00:00
Collecting catalogue>=0.0.4
  Using cached catalogue-1.0.0-py2.py3-none-any.whl (7.7 kB)
Building wheels for collected packages: sense2vec
  Building wheel for sense2vec (setup.py): started
  Building wheel for sense2vec (setup.py): finished with status 'done'
  Created wheel for sense2vec: filename=sense2vec-1.0.2-py2.py3-none-any.whl size=34994 sha256=187f76f65ac42fad03aa724b43b52e0892706135a6ca42d9ced6d0603c374565
  Stored in directory: c:\users\kaurnavdeep1\appdata\local\pip\cache\wheels\cf\d3\93\fe8e871b410c5456a7b06be0f154ad6bab298462471551f39d
Successfully built sense2vec
Installing collec

In [24]:
!pip install --quiet sense2vec==1.0.3
!wget https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz
!tar -xvf  s2v_reddit_2015_md.tar.gz
!ls s2v_old

# load sense2vec vectors
from sense2vec import Sense2Vec
s2v = Sense2Vec().from_disk('s2v_old')

'wget' is not recognized as an internal or external command,
operable program or batch file.
tar: Error opening archive: Failed to open 's2v_reddit_2015_md.tar.gz'
'ls' is not recognized as an internal or external command,
operable program or batch file.


ValueError: Can't read file: s2v_old\cfg