Initialize

In [1]:
import sys
import csv

# Unity Communication
sys.path.append('../simulation/')
from unity_simulator.comm_unity import UnityCommunication
comm = UnityCommunication()
scene_num = 1
comm.reset(scene_num-1)

# Action
action_list = []
arg0_action = []
arg1_action = []
arg2_action = []

csv_file_path = "./aist_action.csv"
with open(csv_file_path, 'r', newline='') as csv_file:
    f = csv.reader(csv_file)

    for row in f:
        action_list.append(row[0].lower())
        if int(row[1]) == 0:
            arg0_action.append(row[0].lower())
        elif int(row[1]) == 1:
            arg1_action.append(row[0].lower())
        else:
            arg2_action.append(row[0].lower())

print(f"action list: {action_list}")

# Object and Room
s, graph = comm.environment_graph()
nodes = graph['nodes']
edges = graph['edges']

object_list = []
object_property = {}
room_list = []

for n in nodes:
    if n.get("category") == "Rooms":
        room_list.append(n.get("class_name"))
    else:
        object_list.append(n.get("class_name"))
        object_property[n.get("class_name")] = n.get("properties")

object_list = sorted(list(set(object_list)))

print(f"object list: {object_list}")
print(f"room list: {room_list}")

action list: ['find', 'walk', 'walk towards', 'walk forward', 'run', 'sit', 'stand up', 'grab', 'open', 'close', 'put back', 'put in', 'switch on', 'switch off', 'drink', 'turn to', 'look at', 'wipe', 'read', 'pour', 'type', 'squeeze', 'cut', 'eat', 'brush', 'fold', 'jump', 'kneel', 'lift', 'rinse', 'scrub', 'squat', 'stretch', 'sweep', 'throw', 'unfold', 'wrap', 'write', 'fall', 'fall sit', 'climb', 'stand', 'straddle', 'leg opp', 'shake', 'soak', 'talk', 'text', 'jump up', 'jump down', 'stir', 'vacuum', 'go down']
object list: ['apple', 'bananas', 'barsoap', 'bathroomcabinet', 'bathroomcounter', 'bathtub', 'bed', 'bellpepper', 'bench', 'book', 'bookshelf', 'box', 'breadslice', 'cabinet', 'candle', 'candybar', 'ceiling', 'ceilinglamp', 'cellphone', 'cereal', 'chair', 'chicken', 'chips', 'chocolatesyrup', 'clock', 'closet', 'closetdrawer', 'clothespants', 'clothespile', 'clothesshirt', 'coffeemaker', 'coffeepot', 'coffeetable', 'computer', 'condimentbottle', 'condimentshaker', 'cpuscre

LLM

In [2]:
import openai

API_KEY = "" #Please insert open ai api key
openai.api_key = API_KEY

In [3]:
def rewrite_description(description):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": """
            Using only my definitions of "Available Actions" and "Available Objects", Rewrite the description of activity entered by the user as a comma-separated action plans.
             
             Warnings:
             1. Do not omit the object to the action, do not use anaphoric pronouns.
             2. Abstract descriptions will also be rewritten into detailed action plans.
             """},

            {"role": "assistant", "content": f"""
            Definition of available actions and available objects:
            Available Action: {action_list}
            Available Objects: {room_list + object_list}
            
            """},

            {"role": "user", "content": f"{description}"}
        ]
    )

    candidate = response["choices"][0]["message"]["content"].split("\n")

    return candidate[0]

Sentence Transformers

In [4]:
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [5]:
all_combination_dic = {}
grab_action = ["grab", "brush", "lift", "sweep", "throw", "unfold", "wrap", "write", "shake", "soak", "talk", "text"]

for room in room_list:
    all_combination_dic[f"walk {room}"] = f"<char0> [walk] <{room}> (1)"

for action in action_list:
    if action in arg0_action:
        all_combination_dic[f"{action}"] = f"<char0> [{action}]"

    # sittable
    elif action == "sit" or action == "fall sit":
        for obj in object_list:
            if "SITTABLE" in object_property.get(obj):
                all_combination_dic[f"{action} {obj}"] = f"""<char0> [{str(action).replace(" ","")}] <{obj}> (1)"""
    # grabable
    elif action in grab_action:
        for obj in object_list: 
            if "GRABBABLE" in object_property.get(obj):
                all_combination_dic[f"{action} {obj}"] = f"""<char0> [{str(action).replace(" ","")}] <{obj}> (1)"""
    # openable
    elif action == "open" or action == "close":
        for obj in object_list: 
            if "CAN_OPEN" in object_property.get(obj):
                all_combination_dic[f"{action} {obj}"] = f"<char0> [{action}] <{obj}> (1)"
    # has_switch
    elif action == "switch on" or action == "switch off" or action == "type":
        for obj in object_list: 
            if "HAS_SWITCH" in object_property.get(obj):
                all_combination_dic[f"{action} {obj}"] = f"""<char0> [{str(action).replace(" ","")}] <{obj}> (1)"""
    # drink
    elif action == "drink":
        for obj in object_list: 
            if "RECIPIENT" in object_property.get(obj) or "DRINKABLE" in object_property.get(obj):
                all_combination_dic[f"drink {obj}"] = f"<char0> [drink] <{obj}> (1)"
    # put back
    elif action == "put back":
        for obj1 in object_list: 
            if "GRABBABLE" in object_property.get(obj1):
                all_combination_dic[f"put back {obj1}"] = f"<char0> [putobjback] <{obj1}> (1)"
                for obj2 in object_list: 
                    if "SURFACES" in object_property.get(obj2):
                        all_combination_dic[f"put {obj1} back {obj2}"] = f"<char0> [putback] <{obj1}> (1) <{obj2}> (1)"
    # put in
    elif action == "put in":
        for obj1 in object_list: 
            if "GRABBABLE" in object_property.get(obj1):
                for obj2 in object_list: 
                    if "CONTAINERS" in object_property.get(obj2):
                        all_combination_dic[f"put {obj1} in {obj2}"] = f"<char0> [putin] <{obj1}> (1) <{obj2}> (1)"
    # eat
    elif action == "eat":
        for obj in object_list: 
            if "EATABLE" in object_property.get(obj):
                all_combination_dic[f"eat {obj}"] = f"<char0> [eat] <{obj}> (1)"
    # read
    elif action == "read":
        for obj in object_list: 
            if "READABLE" in object_property.get(obj):
                all_combination_dic[f"read {obj}"] = f"<char0> [read] <{obj}> (1)" 
    # wipe
    elif action == "wipe":
        for obj1 in object_list: 
            if "GRABBABLE" in object_property.get(obj1):
                for obj2 in object_list:
                    all_combination_dic[f"wipe {obj2} with {obj1}"] = f"<char0> [wipe] <{obj1}> (1) <{obj2}> (1)"
    # pour
    elif action == "pour":
        for obj1 in object_list: 
            if "POURABLE" in object_property.get(obj1) or "DRINKABLE" in object_property.get(obj1):
                for obj2 in object_list:
                    if "RECIPIENT" in object_property.get(obj2):
                        all_combination_dic[f"pour {obj1} into {obj2}"] = f"<char0> [pour] <{obj1}> (1) <{obj2}> (1)"
    # scrub
    elif action == "scrub":
        for obj1 in object_list: 
            for obj2 in object_list:
                all_combination_dic[f"scrub {obj1} {obj2}"] = f"<char0> [scrub] <{obj1}> (1) <{obj2}> (1)"

    # cut
    elif action == "cut":
        for obj1 in object_list: 
            if "CUTTABLE" in object_property.get(obj1):
                for obj2 in object_list:
                    if "EATABLE" in object_property.get(obj2):
                        all_combination_dic[f"cut {obj2} with {obj1}"] = f"<char0> [cut] <{obj1}> (1) <{obj2}> (1)"
    # squeeze
    elif action == "squeeze":
        for obj in object_list: 
            if "CLOTHES" in object_property.get(obj):
                all_combination_dic[f"squeeze {obj}"] = f"<char0> [squeeze] <{obj}> (1)"
    # climb
    elif action == "climb":
        for obj in object_list: 
            if obj == "coffeetable" or obj == "bed":
                all_combination_dic[f"climb {obj}"] = f"<char0> [climb] <{obj}> (1)"

    else:
        for obj in object_list:
            all_combination_dic[f"{action} {obj}"] = f"""<char0> [{str(action).replace(" ","")}] <{obj}> (1)"""

all_combination = [k for k in all_combination_dic.keys()]

embeddings = model.encode(all_combination)

In [6]:
def sentence_cosine_similarity(sentence):

    cos_sim = util.cos_sim(model.encode(sentence), embeddings)[0]

    all_sentence_combinations = []
    for i in range(len(cos_sim)-1):
        all_sentence_combinations.append([cos_sim[i], i])

    all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True)     
    
    return all_combination[all_sentence_combinations[0][1]]

Verification

In [7]:
def unity_reset():
    comm.reset(scene_num-1)
    comm.add_character('Chars/male1')
    success, graph = comm.environment_graph()

In [8]:
def unity_simulate(action_script):
    unity_reset()
    success, message = comm.render_script(script=action_script, find_solution=True, skip_execution=True)
    return success, message

In [9]:
def run_verification(action_script):
    success, message = unity_simulate(action_script)
    if success == True:
        return action_script
    
    action_history = []
    for next in action_script:
        try:
            success, message = unity_simulate(action_history + [next])
        
            if success == False and type(message) is str:
                pass

            elif success == False and (message.get('0').get('message').find("Not found object") != -1 or message.get('0').get('message').find("Can not select object") != -1):

                additional_action = [(next).replace(next.split()[1], "[walk]")]
                additional_action.append(next)
            
                success, message = unity_simulate(action_history + additional_action)
                if success == True:
                    action_history = action_history + additional_action

            elif success == True:
                action_history.append(next)
        
        except:
            print("Exception")

    return action_history

Action Script Generation

In [10]:
def action_plan_generation(description):
    rewrited_description = rewrite_description(description)
    action_plan = rewrited_description.split(",")

    action_script = [all_combination_dic.get(sentence_cosine_similarity(row)) for row in action_plan]

    return run_verification(action_script)

In [None]:
action_description = 'go to livingroom and sit on the sofa'
action_plan_generation(action_description)