In [1]:
import requests
import queue

In [2]:
'''
    A basic function that gets the related words
    derived from concept net API.
    This function is used as a baseline only.
'''
def get_related(key_word):
    result = {}
    obj = requests.get('http://api.conceptnet.io/related/c/en/'+key_word+'?filter=/c/en').json()
    for relate in obj['related']:
        related_word = relate['@id'].lstrip('/c/en/')
        related_weight = relate['weight']
        result.update({related_word:related_weight})
    return result

In [3]:
'''
    Using queue to get the related words recursively
    limit: set the upper limit of the related word number
'''
def get_all_related(key_word,limit=100):
    result = {key_word:1}
    q = queue.PriorityQueue()
    q.put((-1,key_word))
    count = 0
    while (not q.empty() and count <= limit):
        ele = q.get()
        word = ele[1]
        weight = -ele[0]
        obj = requests.get('http://api.conceptnet.io/related/c/en/'+word+'?filter=/c/en').json()
        for relate in obj['related']:
            related_word = relate['@id'].lstrip('/c/en/')
            if related_word not in result:
                #print("%s can be added" %related_word)
                related_weight = relate['weight']
                result.update({related_word:related_weight * weight})
                q.put((-related_weight * weight,related_word))
                count += 1
            else:
                #print("%s cannot be added since it's duplicated" %related_word)
                pass
    #rank the result by weight
    result_order_list=sorted(result.items(),key=lambda x:x[1],reverse=True)
    result_order_dict = dict(result_order_list)
    return result_order_dict

In [4]:
"""
    store the result into a txt file
"""
def store_in_txt(result, filepath):
    f = open(filepath, "a+")
    for word,weight in result.items():
        f.write(word+" "+str(weight)+"\n")

In [7]:
if __name__ == "__main__":
    result = get_all_related("smoking",150)
    store_in_txt(result,"smoking.txt")
    print("done")

done
