In [1]:
import logging
import time

import gensim
import nltk.stem
import spacy
from bokeh.io import output_notebook
from bokeh.models import HoverTool, Range1d, LabelSet, Label
from bokeh.plotting import figure, output_file, show, ColumnDataSource

from ifai import *

# logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

# loading model
model = load_model('models/GoogleNews-vectors-negative300.bin')

# set up bokeh
output_notebook()

2018-01-30 09:03:56,471 : INFO : loading EuclideanKeyedVectors object from models/GoogleNews-vectors-negative300.cache


[nltk_data] Downloading package wordnet to /Users/Lisa/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


2018-01-30 09:04:04,669 : INFO : loading syn0norm from models/GoogleNews-vectors-negative300.cache.syn0norm.npy with mmap=None
2018-01-30 09:04:11,270 : INFO : loading syn0 from models/GoogleNews-vectors-negative300.cache.syn0.npy with mmap=None
2018-01-30 09:04:22,609 : INFO : loaded models/GoogleNews-vectors-negative300.cache


## Utility Functions and Sample Tests


### `w2v_get_verbs_for_noun` 

    This function takes in a noun and compute the possible verbs by using word2vec model. The function uses a list of conons to computes a average vector in between noun and verbs. The vector is then used to fetch possible verbs from the model. The words returned by model are then lemmatized, compared to the top 1000 frequently use english verbs and the ones that in both set are saved. The verbs then are unioned with commonly used verbs in interactive fiction. There are currently three possible sets can be returns.

    Please use the next cell to run for samples. Note that you can uncomment test lines in the function to inspect different verb sets.

    PS: this algorithm is a replication of Fulda.

#### `w2v_get_adjectives_for_noun`
    
    This function takes in a noun and compute the possible adjectives by using word2vec model. The process of fetching possible adj are same as that in the get_verbs_for_noun function. The words are then lemmatized and return as a list.

In [3]:
test_nouns = ["book", "sword", "horse", "key", "prison", "sky", "steel", "apple", "sun"]

# get_verbs_for_noun tests
print("-" * 5, "w2v get_verbs_for_noun function tests", "-" * 5)
for noun in test_nouns:
    print(noun, ":", w2v_get_verbs_for_noun(model, noun))
print()

# get_adjectives_for_noun tests
print("-" * 5, "w2v_get_adjectives_for_noun function tests", "-" * 5)
for noun in test_nouns:
    print(noun, ":", w2v_get_adjectives_for_noun(model, noun))
print()

# balance #not quite working due to limited cannon 
for noun in test_nouns:
    print(noun, ":")
    for i in range(5):
        adj_ls = w2v_get_adjectives_for_noun(model, noun)
        adj = adj_ls[0]
        print(i, noun, adj)
        print("adj: ", adj_ls)
        noun_ls = w2v_get_nouns_for_adjective(model, adj)
        noun = noun_ls[0]
        print("noun: ", noun_ls)
        print()
        

book :
0 book paperback
adj:  ['paperback']
noun:  ['hardback', 'hardcover', 'paperbacks', 'tome', 'book']

1 hardback paperback
adj:  ['paperback', 'hardcover', 'hardbound']
noun:  ['hardback', 'hardcover', 'paperbacks', 'tome', 'book']

2 hardback paperback
adj:  ['paperback', 'hardcover', 'hardbound']
noun:  ['hardback', 'hardcover', 'paperbacks', 'tome', 'book']

3 hardback paperback
adj:  ['paperback', 'hardcover', 'hardbound']
noun:  ['hardback', 'hardcover', 'paperbacks', 'tome', 'book']

4 hardback paperback
adj:  ['paperback', 'hardcover', 'hardbound']
noun:  ['hardback', 'hardcover', 'paperbacks', 'tome', 'book']

sword :
0 sword dark
adj:  ['dark', 'bright', 'sharp', 'gray', 'dull', 'dry', 'lustrous']
noun:  ['darkness', 'rope', 'knife', 'sword']

1 darkness dark
adj:  ['dark', 'bright', 'gray', 'darkening', 'cloudy', 'dim', 'dreary', 'drear', 'darkest', 'darker']
noun:  ['darkness', 'rope', 'knife', 'sword']

2 darkness dark
adj:  ['dark', 'bright', 'gray', 'darkening', 'cl

### `possible_actions`

    The function take in a sentence and return a list of possible actions. 

    The algorithm uses Spacy to find nouns in the sentence. It then calls get_verbs_for_noun function to obtain a list of actions. The result first get stored in a dictionary with key being noun and value being possible actions. The function will return a list of possible actions combining keys and values of the dictionary. 


In [5]:
# possible_actions tests
s0 = "Soon you’ll be able to send and receive money from friends and family right in Messages."
s1 = "This is an open field west of a white house, with a boarded front door. There is a small mailbox here."
s2 = "This is a forest, with trees in all directions around you."
s3 = "This is a dimly lit forest, with large trees all around.  One particularly large tree with some low branches stands here."

for sentence in [s0, s1, s2, s3]:
    print()
    print(sentence)
    print(possible_actions(model, sentence))


Soon you’ll be able to send and receive money from friends and family right in Messages.
['wear friend', 'eat friend', 'go friend', 'hang friend', 'forget friend', 'confide friend', 'come friend', 'lend friend', 'talk friend', 'give friend', 'return friend', 'keep friend', 'meet friend', 'needle friend', 'rely friend', 'wear family', 'stay family', 'eat family', 'relax family', 'go family', 'enjoy family', 'include family', 'visit family', 'play family', 'move family', 'plan family', 'share family', 'be family', 'branch family', 'buy family', 'call family', 'go you', 'get you', 'know you', 'forget you', 'invest money', 'go money', 'buy money', 'raise money', 'pay money', 'spend money', 'purchase money', 'last money', 'increase money', 'obtain money', 'save money', 'be money', 'further money', 'make money', 'wear Messages', 'respond Messages', 'connect Messages', 'speak Messages']

This is an open field west of a white house, with a boarded front door. There is a small mailbox here.
['

### `w2v_get_tools_for_verb`

    This function take in a verb and return a list of tools that can afford the verb. 

    Please use the following cell to run for samples. 

In [6]:
test_verbs = ["climb", "use", "open", "lift", "kill", "murder", "drive", "ride", "cure", "type", "sing"]
for verb in test_verbs:
    print(verb, ":", w2v_get_tools_for_verb(model, verb))

climb : ['climbing', 'climbs', 'sandstone_slab', 'climbed', 'rocky_ridge', 'icy_ledge', 'bramble_bushes', 'Nepalese_sherpa', 'mesquite_bushes', 'hemlock_tree']
use : ['using', 'scissors_clippers', 'uses', 'used', 'reusing_rainwater', 'custom_crating', 'useof', 'hay_stacks', 'photogram', 'GEDCOM_file']
open : ['door', 'evergreen_wreath', 'Dresser_drawers', 'fanlight', 'Chestnuts_roasting', 'window', 'wooden_crate', 'doors', 'opened', 'minivan_partially_submerged']
lift : ['lifting', 'lifted', 'Lifting', 'lifts', '#/#-foot_Douglas_fir', 'cherrypicker', 'Bobcat_loader', 'telescopic_boom', 'bucket_loader', 'loader_tractor']
kill : ['killing', 'regular_seasonal_flus', 'Edmond_Zabin', 'cocked_pistol', 'panga_machete', '.##_Colt_revolver', 'tranquillizer_dart', 'mattock', 'jungle_foliage', 'de_la_Burde']
murder : ['slaying', 'murderer', 'murders', 'murdering', 'Dr_Chandra_Naraynsingh', 'homicide', 'attempted_murder', 'muder', 'death', 'headless_corpse']
drive : ['tow_dolly', 'drives', 'maroon

### `w2v_rank_manipulability`

    This function takes in a list of nouns and return a tupple list ranked by graspability. Those in front of the list are more similar to tree and thus more manipulable. The algorithm rank by their vector's doc product with the "forest" - "tree" vectors. 
    

In [4]:
test_grasp = ["ocean", "cloud", "metal", "house", "wheel", "mosquito", "factory", "apple", "wallet", "bridge", 
              "hat", "troll", "flower", "box", "key", "door", "bottle", "water", "bag"]
print(w2v_rank_manipulability(model, test_grasp))

[('flower', -2.0970333), ('bottle', -1.8987761), ('apple', -1.8763946), ('hat', -1.7678158), ('wallet', -1.7192638), ('bag', -1.5255593), ('box', -1.51406), ('wheel', -1.3315094), ('bridge', -1.3085077), ('door', -1.1711469), ('mosquito', -0.71538937), ('house', -0.48642784), ('troll', -0.43783754), ('cloud', -0.21320903), ('metal', 0.038182996), ('key', 0.15650916), ('water', 0.6583246), ('factory', 1.0298679), ('ocean', 1.3738527)]


### Manipulability Demo

    This Demo replicates Fulda's paper. It maps nouns to xy coordinates where x_axis is the vector of ["forest" - "tree"] and y_axis is the vector of ["mountain" - "pebbel"]. 

    Note here interestingly that key is not so graspable! 

In [4]:
# This function takes in a noun and return a pair of coordination. 
def get_grasp_coordination(model, noun):
    x_axis = model.word_vec("forest") - model.word_vec("tree")
    y_axis = model.word_vec("mountain") - model.word_vec("pebble")
    vec = model.word_vec(noun)
    noun_x = np.dot(vec, x_axis)
    noun_y = np.dot(vec, y_axis)
    return noun_x, noun_y

# get coordination for nouns
test_grasp = ["ocean", "cloud", "metal", "house", "wheel", "mosquito", "factory", "apple", "wallet", "bridge", 
              "hat", "troll", "flower", "box", "key", "door", "bottle", "water", "bag"]
xs = []
ys = []
for noun in test_grasp:
    x_co, y_co = get_grasp_coordination(model, noun)
    xs.append(x_co)
    ys.append(y_co)
    print(noun, ":", xs, ",", ys)

# Graph a scatter plot
source = ColumnDataSource(
        data=dict(
            x = xs,
            y = ys,
            noun = test_grasp,
        )
    )

hover = HoverTool(
        tooltips=[
            ("noun", "@noun"),
            ("(x,y)", "(@x, @y)"),
        ]
    )

p = figure(plot_width=700, plot_height=700, tools=[hover], title="Graspability")
p.circle('x', 'y', size = 10, source = source)

labels = LabelSet(x = 'x', y = 'y', text = 'noun', level = 'glyph',
              x_offset = 5, y_offset = 5, source = source, render_mode = 'canvas')
p.add_layout(labels)

p.xaxis[0].axis_label = '[Forrest] - [Tree]'
p.yaxis[0].axis_label = '[Mountain] - [Pebble]'
show(p)

ocean : [1.3738527] , [-0.84101164]
cloud : [1.3738527, -0.21320903] , [-0.84101164, 0.13455975]
metal : [1.3738527, -0.21320903, 0.038182996] , [-0.84101164, 0.13455975, -1.3513044]
house : [1.3738527, -0.21320903, 0.038182996, -0.48642784] , [-0.84101164, 0.13455975, -1.3513044, 0.66068959]
wheel : [1.3738527, -0.21320903, 0.038182996, -0.48642784, -1.3315094] , [-0.84101164, 0.13455975, -1.3513044, 0.66068959, 0.0065233707]
mosquito : [1.3738527, -0.21320903, 0.038182996, -0.48642784, -1.3315094, -0.71538937] , [-0.84101164, 0.13455975, -1.3513044, 0.66068959, 0.0065233707, -1.3532331]
factory : [1.3738527, -0.21320903, 0.038182996, -0.48642784, -1.3315094, -0.71538937, 1.0298679] , [-0.84101164, 0.13455975, -1.3513044, 0.66068959, 0.0065233707, -1.3532331, -0.04023996]
apple : [1.3738527, -0.21320903, 0.038182996, -0.48642784, -1.3315094, -0.71538937, 1.0298679, -1.8763946] , [-0.84101164, 0.13455975, -1.3513044, 0.66068959, 0.0065233707, -1.3532331, -0.04023996, -1.808363]
wallet 

In [8]:
# ignore main for now

def main():
    s1 = "This is an open field west of a white house, with a boarded front door. There is a small mailbox here."
    s2 = "This is a forest, with trees in all directions around you."
    s3 = "This is a dimly lit forest, with large trees all around.  One particularly large tree with some low branches stands here."
    sentences = [s1, s2, s3]
    
    tic = time.time()
    for sentence in sentences:
        print(possible_actions(model, sentence))
    toc = time.time()
    print("total time spend:", toc - tic, "s")

if __name__ == "__main__":
    main()

['wear mailbox', 'mail mailbox', 'take mailbox', 'write mailbox', 'get mailbox', 'send mailbox', 'receive mailbox', 'store mailbox', 'bill mailbox', 'collect mailbox', 'have mailbox', 'letter mailbox', 'people mailbox', 'wear door', 'sit door', 'take door', 'push door', 'go door', 'leave door', 'get door', 'give door', 'open door', 'separate door', 'enter door', 'be door', 'keep door', 'close door', 'control door', 'exit door', 'enclose door', 'seal door', 'wear house', 'stay house', 'sit house', 'take house', 'eat house', 'write house', 'go house', 'shelter house', 'cost house', 'burn house', 'weather house', 'house house', 'face house', 'keep house', 'belong house', 'be house', 'settle house', 'go west']
['wear tree', 'tree tree', 'take tree', 'eat tree', 'gather tree', 'go tree', 'hang tree', 'get tree', 'give tree', 'shade tree', 'grow tree', 'branch tree', 'climb tree', 'cast tree', 'drop tree', 'make tree', 'provide tree', 'rain tree', 'build tree', 'go you', 'get you', 'know you

### cn_get_verbs_for_noun

    This function return a list of top ten possible verbs with weight by query "capable of" & "used for" relations from ConceptNet. The weight here is the weight of the edge. 
    
### cn_get_adjectives_for_noun

    This function return a list of adj best describe the noun from ConceptNet with "has property" relation
    

In [5]:
test_nouns = ["book", "sword", "brick", "key", "prison"]

# cn_get_verbs_for_noun tests compare with w2v_get_verbs_for_noun tests
print("-" * 5, "obtain verbs tests", "-" * 5)
for noun in test_nouns:
    print(noun, ":")
    print("ConceptNet:", cn_get_verbs_for_noun(noun))
    print("word2vec result:", w2v_get_verbs_for_noun(model, noun))
    print("combine version", get_verbs_for_noun(model, noun))
    print()


    
# cn_get_adjectives_for_noun compare with w2v_get_adjectives_for_noun tests
print("-" * 5, "obtain adjetcives tests", "-" * 5)
for noun in test_nouns:
    print(noun, ":")
    print("ConceptNet:", cn_get_adjectives_for_noun(noun))
    print("word2vec result:", w2v_get_adjectives_for_noun(model, noun))
    print("combine version", get_adjectives_for_noun(model, noun))
    print()


----- obtain verbs tests -----
book :
ConceptNet: [('learn', 20.492422502470642), ('study', 6.0), ('develop', 5.0), ('record', 4.0), ('include', 3.0), ('teach', 3.0), ('be', 3.0), ('bind', 3.0), ('build', 3.0), ('create', 3.0)]
word2vec result: ['write', 'book', 'read', 'give', 'eat', 'take', 'go']
combine version ['write', 'book', 'read', 'give', 'eat', 'take', 'go', 'learn', 'study', 'develop', 'record', 'include', 'teach', 'be', 'bind', 'build', 'create']

sword :
ConceptNet: [('kill', 9.47213595499958), ('fence', 6.0), ('cut', 3.0), ('chop', 2.0), ('decorate', 2.0), ('intimidate', 2.0), ('look', 2.0), ('maim', 2.0), ('stab', 2.0), ('stick', 2.0)]
word2vec result: ['wear']
combine version ['wear', 'kill', 'fence', 'cut', 'chop', 'decorate', 'intimidate', 'look', 'maim', 'stab', 'stick']

brick :
ConceptNet: [('construct', 4.0), ('break', 3.0), ('double', 2.0)]
word2vec result: ['ignore', 'build', 'sit', 'wear', 'go']
combine version ['ignore', 'build', 'sit', 'wear', 'go', 'construc

### get_synonyms
    This function return a list of synonym of the noun from ConceptNet and wordnet (not ideal at the moment)

### cn_get_locations
    This function return a list of locations that the noun possibly located according to ConceptNet's relations ("at location", "locate near", "part of"


In [6]:
# get_synonyms tests
print("-" * 5, "obtain synonyms tests", "-" * 5)
for noun in test_nouns:
    print(noun, ":", get_synonyms(noun, 'n')[:10])

# get_locations_cn tests
print("-" * 5, "obtain locations tests", "-" * 5)
for noun in test_nouns:
    print(noun, ":", cn_get_locations(noun)[:10])

----- obtain synonyms tests -----




 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))


book : ['essay', 'album', 'novel', 'publication', 'dictionary']
sword : ['blade', 'brand', 'steel', 'blade', 'dagger', 'saber', 'brand', 'foil']
brick : ['stone', 'slab', 'cube', 'cinder block', 'brickwork']
key : ['major', 'leading', 'indispensable', 'crucial', 'primary']
prison : ['prison_house', 'penitentiary', 'lockup', 'confinement', 'jail', 'dungeon']
----- obtain locations tests -----
book : ['a classroom', 'the shelf', 'a bookshelf', 'a university', 'a book', 'your desk', 'a store', 'a backpack', 'a bedroom', 'a closet']
sword : ['a sheath', 'a museum', 'a collection', 'a container called a scabbard', 'the hands of a knight', 'a musuem', 'a stone', 'armoury']
brick : ['a brick wall']
key : ['a pocket', 'keychain', 'the purse', 'a keyhole', 'at hotel', 'a bus depot', "your car's ignition", 'the door', 'the front door', 'an idea']
prison : ['a prison', 'Alcatraz', 'america', 'a city', 'the countryside', 'fenced area', 'Kansas', 'nearly any town', 'USA']
