In [None]:
import logging
import gensim
import spacy
import time
import nltk.stem

# download wordnet 
nltk.download('wordnet')

# Login
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

# loading model
model = gensim.models.KeyedVectors.load_word2vec_format('./model/GoogleNews-vectors-negative300.bin', binary=True)

## Helper Function(s)

In [None]:
# helper function that compute the average sigma (vector difference of word pair) of a list of given conons
def get_ave_sigma(conons):
    sigma = 0
    for pair in conons:
        v, n = pair.split()
        sigma += model.word_vec(v) - model.word_vec(n)
    ave_sigma = (1 / len(conons)) * sigma
    return ave_sigma

## Utility Functions and Sample Tests


#### get_verbs_for_noun 
    This function takes in a noun and compute the possible verbs by using word2vec model. The function uses a list of conons to computes a average vector in between noun and verbs. The vector is then used to fetch possible verbs from the model. The words returned by model are then lemmatized, compared to the top 1000 frequently use english verbs and the ones that in both set are saved. The verbs then are unioned with commonly used verbs in interactive fiction. There are currently three possible sets can be returns.
    Please use the next cell to run for samples. Note that you can uncomment test lines in the function to inspect different verb sets.
    ps: this algorithm is a replication of Fulda.

#### get_adjectives_for_noun 
    This function takes in a noun and compute the possible adjectives by using word2vec model. The process of fetching possible adj are same as that in the get_verbs_for_noun function. The words are then lemmatized and return as a list.

In [None]:
# return a list of lemmatized verbs that the noun can afford
def get_verbs_for_noun(noun):
    # prepare tools
    wnl = nltk.stem.WordNetLemmatizer()
    conons = list(filter(None, [line.rstrip() for line in open('./word_lists/verb_noun_pair.txt')]))
    sigma = get_ave_sigma(conons)

    # list of common used verbs
    navigation_verbs = ["north", "south", "east", "west", "northeast", "southeast", "southwest", "northwest", "up",
                        "down", "enter", "exit"]
    essential_manipulation_verbs = ["get", "drop", "push", "pull", "open", "close"]
    verb_list = list(filter(None, [line.rstrip() for line in open('./word_lists/top_1000_verbs.txt')]))
    
    # extract words from word2vec model & append lemmatized word to list 
    model_verb = model.most_similar([sigma, noun], [], topn=10)
    word2vec_words = []
    for verb in model_verb:
        word2vec_words.append(wnl.lemmatize(str(verb[0].lower())))
    
    # set operations
    affordant_verbs = list(set(verb_list) & set(word2vec_words))
    final_verbs = list(set(navigation_verbs) | set(essential_manipulation_verbs) | set(affordant_verbs))
    
    # -----------test lines (uncomment below four lines to view different set of verbs)-------------
#     print("-"*10, noun, "-"*10)
#     print("word2vec words: ", word2vec_words)
#     print("affordant verbs: ", affordant_verbs)
#     print("final verbs: ", final_verbs)

    return affordant_verbs

# return a list of adjectives that describe the given noun
def get_adjectives_for_noun(noun):
    conons = list(filter(None, [line.rstrip() for line in open('./word_lists/noun_adj_pair.txt')]))
#     conons = ["knife sharp", "light bright", "ice cold", "fire burning", "desert dry", "sky blue", "night dark",
#                 "rope long"]
    sigma = get_ave_sigma(conons)
    model_adj = model.most_similar([sigma, noun], [], topn = 10)
    word2vec_adj = []
    for adj in model_adj:
        word2vec_adj.append(adj[0])
    return word2vec_adj

In [None]:
test_nouns = ["book", "sword", "horse", "key", "prison"]

# get_verbs_for_noun tests
print("-"*5, "get_verbs_for_noun function tests", "-"*5)
[print(noun, ":", get_verbs_for_noun(noun))for noun in test_nouns]
print()

# get_adjectives_for_noun tests
print("-"*5, "get_adjectives_for_noun function tests", "-"*5)
[print(noun, ":", get_adjectives_for_noun(noun))for noun in test_nouns]

### possible_actions
    The function take in a sentence and return a list of possible actions. 
    The algorithm uses Spacy to find nouns in the sentence. It then calls get_verbs_for_noun function to obtain a list of actions. The result first get stored in a dictionary with key being noun and value being possible actions. The function will return a list of possible actions combining keys and values of the dictionary. 
    Please use the next cell to run for samples. 

In [None]:
# return a list of possible actions by compute affordable actions on nouns in the given sentence
def possible_actions(sentence):
    # prepare tools
    nlp = spacy.load('en')
    doc = nlp(sentence)
    wnl = nltk.stem.WordNetLemmatizer()
    conons = list(filter(None, [line.rstrip() for line in open('./word_lists/verb_noun_pair.txt')]))

    # create dictionary in the form [noun: verbs]
    dictionary = {}
    for chunk in doc.noun_chunks:
        word = wnl.lemmatize(chunk.root.text)
        if word not in dictionary:
            dictionary[word] = get_verbs_for_noun(word)
    
    # loop through dictionary to creat action list
    action_pair = []
    for key, values in dictionary.items():
        [action_pair.append(value + " " + key) for value in values]
    return action_pair

In [None]:
# possible_actions tests
s = "Soon you’ll be able to send and receive money from friends and family right in Messages."
s1 = "This is an open field west of a white house, with a boarded front door. There is a small mailbox here."
s2 = "This is a forest, with trees in all directions around you."
s3 = "This is a dimly lit forest, with large trees all around.  One particularly large tree with some low branches stands here."
sentences = [s, s1, s2, s3]

for sentence in sentences:
    print()
    print(sentence)
    print(possible_actions(sentence))

### get_tools_for_verb
    This function take in a verb and return a list of tools that can afford the verb. 
    Please use the next cell to run for samples. 

In [None]:
def get_tools_for_verb(verb):
    conons = list(filter(None, [line.rstrip() for line in open('./word_lists/verb_noun_pair.txt')]))
    sigma = get_ave_sigma(conons)
    
    model_tools = model.most_similar([verb], [sigma], topn=10)
    word2vec_tools = []
    for tool in model_tools:
        word2vec_tools.append(tool[0])
    return word2vec_tools

In [None]:
test_verbs = ["climb", "use", "open", "lift", "kill", "murder", "drive", "ride", "cure", "type", "sing"]
[print(verb, ":", get_tools_for_verb(verb)) for verb in test_verbs]

In [None]:
# ignore main for now
# def main():
#     test_nouns = ["book", "sword", "horse", "key"]
#     test_verbs = ["climb", "use", "open", "lift", "kill", "murder", "drive", "ride", "cure", "type", "sing"]
#     s = "Soon you’ll be able to send and receive money from friends and family right in Messages."
#     s1 = "This is an open field west of a white house, with a boarded front door. There is a small mailbox here."
#     s2 = "This is a forest, with trees in all directions around you."
#     s3 = "This is a dimly lit forest, with large trees all around.  One particularly large tree with some low branches stands here."
#     sentences = [s, s1, s2, s3]
    
#     tic = time.time()
#     [print(noun, ":", get_verbs_for_noun(noun)) for noun in test_nouns]
#     print("-"*20)
#     [print(verb, ":", get_tools_for_verb(verb)) for verb in test_verbs]
#     for sentence in sentences:
#         print("-" * 3, sentence, "-" * 3)
#         pretty_print_dict(possible_actions(sentence))
    
#     toc = time.time()
#     print("total time spend:", toc - tic, "s")

# if __name__ == "__main__": main()