In [38]:
import requests
import queue

In [64]:
'''
    A basic function that gets the related words
    derived from concept net API.
    This function is used as a baseline only.
'''
def get_related(key_word):
    result = {}
    obj = requests.get('http://api.conceptnet.io/related/c/en/'+key_word+'?filter=/c/en').json()
    for relate in obj['related']:
        related_word = relate['@id'].lstrip('/c/en/')
        related_weight = relate['weight']
        result.update({related_word:related_weight})
    return result

In [98]:
'''
    Using queue to get the related words recursively
    limit: set the upper limit of the related word number
'''
def get_all_related(key_word,limit=100):
    result = {key_word:1}
    q = queue.PriorityQueue()
    q.put((-1,key_word))
    count = 0
    while (not q.empty() and count <= limit):
        ele = q.get()
        word = ele[1]
        weight = -ele[0]
        obj = requests.get('http://api.conceptnet.io/related/c/en/'+word+'?filter=/c/en').json()
        for relate in obj['related']:
            related_word = relate['@id'].lstrip('/c/en/')
            if related_word not in result:
                #print("%s can be added" %related_word)
                related_weight = relate['weight']
                result.update({related_word:related_weight * weight})
                q.put((-related_weight * weight,related_word))
                count += 1
            else:
                #print("%s cannot be added since it's duplicated" %related_word)
                pass
    #rank the result by weight
    result_order_list=sorted(result.items(),key=lambda x:x[1],reverse=True)
    result_order_dict = dict(result_order_list)
    return result_order_dict

In [108]:
result = get_all_related("fast_food",50)

In [109]:
result

{'fast_food': 1,
 'flip_burgers': 0.971,
 'burger_king': 0.777,
 'mickey_d': 0.766,
 'golden_arches': 0.74,
 'mcdonalds': 0.74,
 'burger_flipper': 0.689,
 "macca's": 0.687,
 "mcdonald's": 0.661,
 'hamburgers': 0.612,
 'burgers': 0.606,
 'burger': 0.602,
 'taco_bell': 0.595,
 'drive_thru': 0.583,
 'mcdonald': 0.578,
 'hamburger': 0.568,
 'sandwich_shop': 0.568,
 'maccas': 0.563,
 'heeseburgers': 0.558,
 'junk_food': 0.557,
 'kfc': 0.55,
 'onvenience_stores': 0.548,
 'slow_food': 0.545,
 'vada_pav': 0.54,
 'hicken_nuggets': 0.525,
 'heeseburger': 0.52,
 'atery': 0.515,
 'restaurants': 0.512,
 'fatburger': 0.512,
 'ateries': 0.509,
 'restaurant': 0.505,
 'greasy_spoon': 0.505,
 'bar_and_grill': 0.502,
 'fast_casual': 0.502,
 'ground_beef': 0.498,
 'veggie_burger': 0.496,
 'tater_tot': 0.49,
 'arby': 0.48,
 'takeout': 0.479,
 'fries': 0.477,
 'steak_house': 0.473,
 'onvenience_store': 0.472,
 'hamburger_steak': 0.47,
 'special_sauce': 0.467,
 'diners': 0.464,
 'tater_tots': 0.463,
 'pizza_

In [115]:
"""
    store the result into a txt file
"""
def store_in_txt(result, filepath):
    f = open(filepath, "a+")
    for word,weight in result.items():
        f.write(word+" "+str(weight)+"\n")

In [116]:
if __name__ == "__main__":
    result = get_all_related("fast_food",50)
    store_in_txt(result,"result.txt")
    print("done")

done
