In [None]:
import logging
import time

import gensim
import nltk.stem
import spacy
from bokeh.io import output_notebook
from bokeh.models import HoverTool, Range1d, LabelSet, Label
from bokeh.plotting import figure, output_file, show, ColumnDataSource

from ifai import *

# logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

# loading model
model = load_model('models/GoogleNews-vectors-negative300.bin')

# set up bokeh
output_notebook()

## Utility Functions and Sample Tests


#### get_verbs_for_noun 
    This function takes in a noun and compute the possible verbs by using word2vec model. The function uses a list of conons to computes a average vector in between noun and verbs. The vector is then used to fetch possible verbs from the model. The words returned by model are then lemmatized, compared to the top 1000 frequently use english verbs and the ones that in both set are saved. The verbs then are unioned with commonly used verbs in interactive fiction. There are currently three possible sets can be returns.
    Please use the next cell to run for samples. Note that you can uncomment test lines in the function to inspect different verb sets.
    ps: this algorithm is a replication of Fulda.

#### get_adjectives_for_noun 
    This function takes in a noun and compute the possible adjectives by using word2vec model. The process of fetching possible adj are same as that in the get_verbs_for_noun function. The words are then lemmatized and return as a list.

In [None]:
test_nouns = ["book", "sword", "horse", "key", "prison"]

# get_verbs_for_noun tests
print("-" * 5, "get_verbs_for_noun function tests", "-" * 5)
for noun in test_nouns:
    print(noun, ":", get_verbs_for_noun(model, noun))
print()

# get_adjectives_for_noun tests
print("-" * 5, "get_adjectives_for_noun function tests", "-" * 5)
for noun in test_nouns:
    print(noun, ":", get_adjectives_for_noun(model, noun))
print()

### possible_actions
    The function take in a sentence and return a list of possible actions. 
    The algorithm uses Spacy to find nouns in the sentence. It then calls get_verbs_for_noun function to obtain a list of actions. The result first get stored in a dictionary with key being noun and value being possible actions. The function will return a list of possible actions combining keys and values of the dictionary. 
    Please use the next cell to run for samples. 

In [None]:
# possible_actions tests
s0 = "Soon you’ll be able to send and receive money from friends and family right in Messages."
s1 = "This is an open field west of a white house, with a boarded front door. There is a small mailbox here."
s2 = "This is a forest, with trees in all directions around you."
s3 = "This is a dimly lit forest, with large trees all around.  One particularly large tree with some low branches stands here."

for sentence in [s0, s1, s2, s3]:
    print()
    print(sentence)
    print(possible_actions(model, sentence))

### get_tools_for_verb
    This function take in a verb and return a list of tools that can afford the verb. 
    Please use the following cell to run for samples. 

In [None]:
test_verbs = ["climb", "use", "open", "lift", "kill", "murder", "drive", "ride", "cure", "type", "sing"]
for verb in test_verbs:
    print(verb, ":", get_tools_for_verb(model, verb))

### rank_manipulability
    This function takes in a list of nouns and return a tupple list ranked by graspability. Those in front of the list are more similar to tree and thus more manipulable. The algorithm rank by their vector's doc product with the "forest" - "tree" vectors. 
    

In [None]:
test_grasp = ["ocean", "cloud", "metal", "house", "wheel", "mosquito", "factory", "apple", "wallet", "bridge", 
              "hat", "troll", "flower", "box", "key", "door", "bottle", "water", "bag"]
print(rank_manipulability(model, test_grasp))

### Demo
    This Demo replicate Fulda's paper. It maps nouns to xy coordinates where x_axis is the vector of ["forest" - "tree"] and y_axis is the vector of ["mountain" - "pebbel"]. 
    Note here interestingly that key is not so graspable! 

In [None]:
# This function takes in a noun and return a pair of coordination. 
def get_grasp_coordination(model, noun):
    x_axis = model.word_vec("forest") - model.word_vec("tree")
    y_axis = model.word_vec("mountain") - model.word_vec("pebble")
    vec = model.word_vec(noun)
    noun_x = np.dot(vec, x_axis)
    noun_y = np.dot(vec, y_axis)
    return noun_x, noun_y

# get coordination for nouns
test_grasp = ["ocean", "cloud", "metal", "house", "wheel", "mosquito", "factory", "apple", "wallet", "bridge", 
              "hat", "troll", "flower", "box", "key", "door", "bottle", "water", "bag"]
xs = []
ys = []
for noun in test_grasp:
    x_co, y_co = get_grasp_coordination(model, noun)
    xs.append(x_co)
    ys.append(y_co)
    print(noun, ":", xs, ",", ys)

# Graph a scatter plot
source = ColumnDataSource(
        data=dict(
            x = xs,
            y = ys,
            noun = test_grasp,
        )
    )

hover = HoverTool(
        tooltips=[
            ("noun", "@noun"),
            ("(x,y)", "(@x, @y)"),
        ]
    )

p = figure(plot_width=700, plot_height=700, tools=[hover], title="Graspability")
p.circle('x', 'y', size = 10, source = source)

labels = LabelSet(x = 'x', y = 'y', text = 'noun', level = 'glyph',
              x_offset = 5, y_offset = 5, source = source, render_mode = 'canvas')
p.add_layout(labels)

p.xaxis[0].axis_label = '[Forrest] - [Tree]'
p.yaxis[0].axis_label = '[Mountain] - [Pebble]'
show(p)

In [None]:
# ignore main for now

def main():
    s1 = "This is an open field west of a white house, with a boarded front door. There is a small mailbox here."
    s2 = "This is a forest, with trees in all directions around you."
    s3 = "This is a dimly lit forest, with large trees all around.  One particularly large tree with some low branches stands here."
    sentences = [s1, s2, s3]
    
    tic = time.time()
    for sentence in sentences:
        print(possible_actions(model, sentence))
    toc = time.time()
    print("total time spend:", toc - tic, "s")

if __name__ == "__main__":
    main()