In [None]:
# Transforming the ALFRED dataset
import tqdm
import glob
import json
import sys
import torchtext
from torchtext.data import get_tokenizer

COMPUTER = 'MAC'
PC_ALFRED_DATA = 'D:\\Datasets\\alfred\\data\\ALFRED_json_2.1.0\\'
LAPTOP_ALFRED_DATA = '/Users/riordan/Desktop/datasets/alfred/data/ALFRED_json_2.1.0/' #Using the json data because image features arent used yet...
JSON_PATH = '/*/*/*.json'
if COMPUTER == 'PC':
    JSON_PATH = JSON_PATH.replace('/','\\')
TEST_JSON_PATH = '/*/*.json'
if COMPUTER == 'PC':
    TEST_JSON_PATH = TEST_JSON_PATH.replace('/','\\')
ALFRED_TRAIN_INSTRUCTION_TSV_FILENAME = 'ALFRED_Train_InstActionArgs.tsv'
ALFRED_VALIDATION_INSTRUCTION_TSV_FILENAME = 'ALFRED_Validation_InstActionArgs.tsv'
ALFRED_TEST_INSTRUCTION_TSV_FILENAME = 'ALFRED_Test_InstActionArgs.tsv'

def load_next_alfred_data(alfred_json_folder):
    # Get list of all instructions and their trajectories
    # glob.glob gets all files and stores them. iglob makes an iterator.
    ALFRED_PATH = (PC_ALFRED_DATA if COMPUTER == 'PC' else LAPTOP_ALFRED_DATA) + alfred_json_folder
    train_json_files = glob.glob(ALFRED_PATH + JSON_PATH)
    tokenizer = get_tokenizer("basic_english")
    dataset = []
    
    # Yeild an alfred json
    for json_file_idx in tqdm.tqdm(range(len(train_json_files))):
        data = json.load(open(train_json_files[json_file_idx]))
        instructions = data['turk_annotations']['anns']
        actions = data['plan']['high_pddl']
        scene = data['scene']
        
        instruction_actions = []
        for d in instructions:
            trajectory = {'task_num': 0, 'task_desc': [], 'instructions': []}
            trajectory['task_num'] = json_file_idx
            trajectory['task_desc'] = d['task_desc']
            for i in range(len(d['high_descs'])):
                instruction = {'instruction': tokenizer(d['high_descs'][i]), 'action': actions[i]['discrete_action']['action'],
                               'argument_1': actions[i]['discrete_action']['args'][0] if 0 < len(actions[i]['discrete_action']['args']) else '<unk>', 
                               'argument_2': actions[i]['discrete_action']['args'][1] if 1 < len(actions[i]['discrete_action']['args']) else '<unk>'}
                trajectory['instructions'].append(instruction)
            instruction_actions.append(trajectory)

        dataset.append((instruction_actions, scene))
    return dataset

train_dataset = load_next_alfred_data('train')
print('%3f kb' % (sys.getsizeof(train_dataset) / 1024.0))


In [None]:
# Filter dataset
import random

def filtered_dataset_copy(alfred_data, scene):
    data_copy = alfred_data.copy()

    def filter_scene(x):
        return x[1]['floor_plan'] == scene

    data_copy = list(filter(filter_scene, data_copy))      

    return data_copy

dataset25 = filtered_dataset_copy(train_dataset, 'FloorPlan25')
# A little random sample for testing
print('dataset samples: ', len(dataset25))
print(dataset25[0][0])
for i in range(3):
    print('*' * 20)
    example = random.choice(dataset25)
    print(example[1]['floor_plan'])
    print(example[1]['object_poses'])
    for j in example[0]:
        print('-' * 20)
        print(j['instructions'])

# Instruction Dot Test
Use the dot product between words and semantic graph. 

TODO:
* Use cosine similarity for scoring, instead of dot product. [DONE]
* Normalise affordance features after adding parent representions. [DONE, Note: I averaged affordance features by dividing by 2.0]

* Remove puncuation from the instructions.
* Instead of adding object features to affordance features score them seperately on nouns and verbs respectively then sum them together for a final score. Disregard determiner words and puncuation.
* Add a distance scaling factor around the robot_node after the first instruction.


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch.nn import Linear


# ==================================================
class DotNet(torch.nn.Module):
    def __init__(self, one_hot=False):
        super(DotNet, self).__init__()
        self.one_hot = one_hot
        self.OPTION = 1
        self.cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        if self.OPTION == 3:
            self.GCN = GCNConv(300, 64)
        
    def forward(self, data, target):
        '''
        DotNet scores each node based upon the scores it recieves for each word in the instruction.
        I have three ideas for how to deal with non-location nodes:
        - 1 [Simplest] Ignore them, compute scores (dot product) between each word vector and each node feature vector
        then sum all the scores to produce a final score for each node. (This is naive but a good place to start)
        - 2 [Hardest] Compute scores for each node feature then add non-location nodes score to parents
        - 3 [Weird] Use a GCN to learn how to combine the node features, then compute scores on those features.
        Because this approach has some learning it isnt comparable to the last two but IS comparable to niko's target
        net. It'll be proving the efficacy of the linear layers that combine target and node features. 
        '''
        x, edge_index = data.x, data.edge_index      
        
        if self.one_hot:
            # Convert indices to one-hot encoding
            max_vector = torch.argmax(torch.cat([x,target]))
            num_vectors = x.shape[0]
            x_one_hot = torch.zeros(num_vectors, max_vector)
            x_one_hot[torch.arange(num_vectors), x.flatten()] = 1.0
            
            target = target.long()
            num_vectors = target.shape[0]
            target_one_hot = torch.zeros(num_vectors, max_vector)
            target_one_hot[torch.arange(num_vectors), target.flatten()] = 1.0
        
        if self.OPTION == 1:
            scores = torch.zeros(x.shape[0], dtype=torch.float32)
            for word in target:
                # Cosine sim
                # word/node repr dim = 300
                # nodes = n
                # words = w
                word = word.expand_as(x)
                scores += self.cos(x, word)
            x = scores.view(-1,1)
        elif self.OPTION == 2:
            pass
        elif self.OPTION == 3:
            pass

        return x


m = DotNet()
class T:
    x = torch.randn(4,10)
    edge_index = torch.randn(2, 4)

t = T()
m(t, torch.randn(10,10))


In [None]:
import ai2thor
import ai2thor.controller
import numpy as np
import torch
from torch.utils.tensorboard import SummaryWriter
import torchtext

import pandas as pd
import json, pickle, time
from tqdm.notebook import trange, tqdm

print(ai2thor.__version__)
# word_vec = torchtext.vocab.FastText()
# controller = ai2thor.controller.Controller(scene='FloorPlan29', grid_size=0.25, visibilityDistance=0.75, quality='Very Low', headless=False)   
controller = ai2thor.controller.Controller() #dict(scene='FloorPlan25', grid_size=0.25, visibilityDistance=0.75)) #, quality='Very Low', headless=True))
controller.start()
controller.step(dict(action='Initialize', headless=True, visibilityDistance=3.0))
print('Passing...')
event = controller.step(dict(action='Pass'))
print('Resetting...')
controller.reset('FloorPlan25')

In [None]:
%run scene_graph.ipynb
%run thorEnvironment.ipynb

import os
import random
import networkx as nx
import matplotlib.pyplot as plt
import copy

# Utility Functions

def draw_graph(graph):
    plt.figure()
    x = []
    y = []
    for node in graph.nodes(data=True):
        index = node[0]
        data = node[1]
        if data['node_type'] == 'object':
            name = data['data']['name']
            pos = data['data']['position']
            x.append(pos['x'])
            y.append(pos['z'])
    ax = plt.scatter(x, y)
    plt.xlabel('X')
    plt.ylabel('Y')
    plt.title('Environment Graph Map')

def add_object_features(graph):
    """
    Graph preprocessing step to add object features to their affordance features.
    Without this step an affordance selection task will be unaware of objects.
    """
    
    class graph_t:
        x = graph._torch_graph.x.clone()
        edge_index = None
    
    g = graph_t()
    # For each object in a graph, add its features to its affordance
    for idx in graph.nodes:
        affordances = graph.get_affordances(idx)
        affordance_count = len(affordances)
        if affordance_count > 0:
            for affordance in affordances:
                g.x[affordance] += g.x[idx]
                g.x[affordance] /= 2.0
    return g

def thor_restore(init_action, object_poses, object_toggles, dirty_and_empty):
    """
    Restore the Thor simulator to an ALFRED defined state
    """    
    
    if len(object_toggles) > 0:
        controller.step((dict(action='SetObjectToggles', objectToggles=object_toggles)))
    
    if dirty_and_empty:
        controller.step(dict(action='SetStateOfAllObjects',
                           StateChange="CanBeDirty",
                           forceAction=True))
        controller.step(dict(action='SetStateOfAllObjects',
                           StateChange="CanBeFilled",
                           forceAction=False))
    
    controller.step((dict(action='SetObjectPoses', objectPoses=object_poses)))
    controller.step(init_action)

def valid_target(p_env, target):
    """
    Sanity Check. Skip impossible graphs, requiring multiple steps, or faulty exploration.
    It appears that the SceneGraph.find() method cannot replace this functionality.
    """
    in_sim = False
    objects = p_env.controller.step(dict(action='Pass')).metadata['objects']
    for obj in objects:
        if target == obj['objectType'].lower():
            in_sim = True
    
    in_graph = False
    for n in env.graph.nodes:
        if env.graph.nodes[n]['node_type'] == 'object':
            if env.graph.nodes[n]['obj'] == target:
                in_graph = True
    
    if not in_sim and not in_graph:
        print('[CHECK][MAJOR ERROR] - Target not found in simulation or graphmap')
        return False
    elif not in_sim and in_graph:
        print('[CHECK][MAJOR ERROR] - Target [%s] found in graphmap but not simulation. Perhaps a trajectory/sim mismatch?' % target)
        return False
    elif in_sim and not in_graph:
        print('[CHECK][NOTE] - Target found in sim but not added to graphmap due to not being found in exploration.')
        return True
    elif in_sim and in_graph:
        return True

def valid_action(p_env, action):
    """
    In future I'd like all actions/affordances to be supported. I am currently working on this.
    This function is used to invalidate trajectories, this reduces available data and is bad. 
    """
    try:
        normalize_action_name(action)
        return True
    except NotImplementedError:
        return False

def valid_trajectory(p_env, trajectory):
    """
    Sanity Check. Goes through each instuction and checks that the targets exist
    and its actions can be performed. WARNING: An object meant to be discovered inside an object
    will not be detected, TODO: Do Check thor Environment for things contains inside things.
    """
    valid = True
    for inst_idx, instruction in enumerate(trajectory['instructions']):
        target_object = instruction['argument_1']
        target_action = instruction['action']
        if not valid_target(p_env, target_object):
            print('\t[CHECK] Invalid Instruction \"%s\" Target Not Found: [%s]' % (' '.join(instruction['instruction']), target_object))
            valid = False
            break
        if not valid_action(p_env, target_action):
            print('\t[CHECK] Invalid action/affordance \" %s \" Is not currently supported!' % target_action)
            valid = False
            break
        
    return valid

def print_logit_scores(graph, logits):
    """
    A helpful debug function for viewing the dot product scores for each node in the graph.
    """
    dot_values = {}
    
    for k,n in graph.nodes.items():
        name = ''
        if n['node_type'] == 'affordance':
            name = n['affordance']
        elif n['node_type'] == 'object':
            name = n['obj']
        dot_values[k] = name, logits[0][k]

    dot_values = sorted(dot_values.values(), key=lambda x: x[1])
    
    for i in dot_values:
        print(i)

def normalize_action_name(name):
    if name == 'GotoLocation':
        return 'go'
    elif name == 'PickupObject':
        return 'pick'
    elif name == 'PutObject':
        return 'put'
    elif name == 'OpenObject':
        return 'open'
    elif name == 'CloseObject':
        return 'close'
    elif name == 'SliceObject':
        return 'slice'
    elif name == 'CleanObject':
        return 'clean'
    #elif name == 'HeatObject':
    #    return 'heat'
    #elif name == 'CoolObject':
    #    return 'cool'
    elif name == 'CookObject':
        return 'cook'
    else:
        raise NotImplementedError("Action %s not implimented yet." % name)

# Seed for reproduceability
np.random.seed(42)
torch.cuda.empty_cache()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using: ',device)

# Experiment parameters
one_hot = False # One hot vector encoding
debug_instructions = False
debug_scores = False
debug_log = True
subset = dataset25[:]
model = DotNet(one_hot=one_hot).to(device)

# The test is for a multiple instructions intended to be executed over multiple timesteps of length 1
max_timesteps = 1 # TODO: Consider increasing to increase performance, niko used 10 i think.
reward = []
used_timesteps = []
success = []
task_number = 0

# Experiment Log
if debug_log:
    debug_log_file = open('baseline_debug_log.tsv', 'w')
    debug_log_file.write("scene \t instruction \t expected_object \t expected_affordance \t actual_object \t actual_affordance \t success\n")


# run for each instruction in a task
for task in subset:
    trajectories, scene = task
    
    scene_name = "FloorPlan%d" % scene['scene_num']
    object_poses = scene['object_poses']
    object_toggles = scene['object_toggles']
    dirty_and_empty = scene['dirty_and_empty']
    init_action = scene['init_action']
    
    # ALFRED has a custom initialised scene for each task
    # This is expensive, every task requires new initalisations and therefore new explorations
    environment_file = "saved_environments/{}_{}.pickle".format(scene_name, hash(str(object_poses)))
    
    episode = {'log_probs':[], 'rewards':[], 'timesteps':0, 'entropy':[]}
    done = False
    t = 0
    task_number += 1
    print(" --- %d / %d --- " % (task_number, len(subset)))
    
    for traj_idx, traj in enumerate(trajectories):
        # Each trajectory in a task uses the same environment but must be reset
        # each trajectory.
        controller.reset(scene_name)
        env = ThorEnvironment(controller=controller)
        
        if os.path.isfile(environment_file):
            # load file, if exists
            thor_restore(init_action, object_poses, object_toggles, dirty_and_empty)
            print('Loading environment...')
            env.graph = env.graph.from_pickle(environment_file)
            if not hasattr(env.graph, 'robot_node'):
                env.graph.setup()
            print('Done')
        else:
            # explore and save
            thor_restore(init_action, object_poses, object_toggles, dirty_and_empty)
            print('Exploring environment...')
            env.explore_environment()
            env.graph.setup()
            print('Saving environment...')
            env.graph.to_pickle(environment_file)
            print('Done')
        
        ###########################################################
        # The following continue statement is here to make sure all
        # we are doing is exploring environments for caching.
        # Exploring and then using those graphs could (should) cause
        # this script to crash right now. So instead ill just explore
        # then once that is done I will fix the crash code where we 
        # use the graph. Since the graphs will be cached testing
        # will be quicker!
        ###########################################################
        #continue
        #draw_graph(env.graph)
        #assert False
        
        complete_trajectory = True
        
        if not valid_trajectory(env, traj):
            print('[CHECK] Invalid trajectory. Skipping...')
            continue
        else:
            print('[CHECK] Valid Trajectory!')
        
        if debug_instructions:
            print("Task Trajectory: %d / %d (%d instructions)" % (traj_idx+1, len(trajectories), len(traj['instructions'])))
        
        for inst_idx, instruction in enumerate(traj['instructions']):
            if debug_instructions:
                print('-----> Instruction %d / %d' % (inst_idx + 1, len(traj['instructions'])))            
            
            target_object = instruction['argument_1']
            target_affordance = instruction['action']
                        
            if True:
                if debug_instructions:
                    print("Target affordance: %s -> %s\nTarget Instruction: %s" % (instruction['action'], target_object, ' '.join(instruction['instruction'])))
                target_embedding = [env.graph.word2vec(word.lower()) for word in instruction['instruction']]
            else:
                # Sanity check for "ground truth" instructions
                sanity_check_instruction = ['pick', target_object] # WARNING: If the task affordance is chance so shall the 'pick' verb.
                if debug_instructions:
                    print("Target object: %s\nTarget Instruction: %s" % (target_object, ' '.join(sanity_check_instruction)))
                target_embedding = [env.graph.word2vec(word.lower()) for word in sanity_check_instruction]
            

            #done = False
            for timestep in range(max_timesteps):
                
                env.graph.to_torch_graph()

                # Experimental, adds the object features to the affordances
                object_smooth_graph = add_object_features(env.graph)

                # run the policy network
                target = torch.tensor(target_embedding, dtype=torch.float).to(device)
                #logits = model(env.graph._torch_graph.to(device), target).t()
                logits = model(object_smooth_graph, target).t()

                # we only care about the affordance nodes, so set the logits of all other nodes to something very small
                mask = torch.tensor(env.graph._torch_affordance_mask==0, dtype=torch.bool).view(1,-1).to(device)
                logits[mask] = -1000

                # sample an action from the output of the network
                action = torch.argmax(logits)
                affordance = env.graph.from_torch_id(action.item())
                
                correct_affordance = False
                actual_affordance = env.graph.nodes[affordance]['affordance']
                if actual_affordance == normalize_action_name(target_affordance): # WARNING: When the task verb changes so shall 'pick'
                    correct_affordance = True
                else:
                    # Exit early if affordance fails
                    complete_trajectory = False
                    break

                # Compute reward
                r = 0.0
                succ, pred = env.graph.get_related_objects(affordance)
                
                correct_object = False
                actual_object = None
                
                if not pred is None:
                    for n in pred:
                        actual_object = env.graph.nodes[n]['obj'] 
                        if actual_object == target_object:
                            if correct_affordance:
                                r = 1.0
                                #done = True
                                correct_object = True
                                # Rio Note: I've decided that Im only going to perform the action if
                                # the affordance on the correct node is found. If not the action can mess up the
                                # graph to the point where other actions cant be performed. 
                                # Consider breaking a task trajectory when a single instruction is missed. 
                                # Perform action. Note: Out of order instructions will often fail on ai2thor simulator.
                                # Therefore, TODO: Break a trajectory loop if even a single instruction is not executed
                                # in the permited timesteps
                                env.step(action.item()) # An Action is an affordance in the graph
                                # TODO: If action cannot be executed, break loop and print error. 
                        else:
                            complete_trajectory = False
                            break
                    
                    print("Target: %s -> %s, Actual: %s -> %s [%s]" % (normalize_action_name(target_affordance), target_object, actual_affordance, actual_object, correct_affordance and correct_object))
                
                if debug_log:
                    debug_log_file.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (scene_name, ' '.join(instruction['instruction']), target_object, target_affordance, env.graph.nodes[n]['obj'], actual_affordance, correct_object and correct_affordance))
                
                episode['rewards'].append(r)
                episode['timesteps'] = t    
        
        if complete_trajectory:
            success.append(1.0)
        else:
            success.append(0.0)
    
    # === end of an episode ===
    used_timesteps.append(t)
    
    reward.append(np.sum(episode['rewards']))

if debug_log:
    debug_log_file.close()

Visualise Baseline Performance
-----------------------------

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

print("Successes: ", len(success))

save_figures = False

plt.close()
plt.figure()
sns.set(font_scale=1.2)
sns.set_style("whitegrid")
ax = sns.lineplot(data=reward,palette='Set1')
ax.set(xlabel='Timesteps', ylabel='Reward (Higher is better)')
plt.title('Environment 25 Pickup Instructions')
plt.tight_layout()
if save_figures:
    plt.savefig('dotnet-environment25-pickup-reward.pdf', dpi=300, bbox_inches='tight')

plt.figure()
sns.set(font_scale=1.2)
sns.set_style("whitegrid")
ax = sns.lineplot(data=success,palette='Set1')
ax.set(xlabel='Instructions', ylabel='Success')
plt.title('Environment 25 - Pickup Instructions')
plt.tight_layout()
if save_figures:
    plt.savefig('dotnet-environment25-pickup-success.png', dpi=300, bbox_inches='tight')

fig, ax = plt.subplots()
ax.pie([sum(success), len(success) - sum(success)], explode=(0.0,0.1), labels=('Success', 'Failure'), autopct='%1.1f%%',
        shadow=False, startangle=45)
ax.axis('equal')
plt.title('Environment 25 - Pickup Task Success Rate (%d Instructions)' % len(success))
if save_figures:
    plt.savefig('dotnet-environment25-full_trajectories-pickup-success-rate.pdf', dpi=300, bbox_inches='tight')

In [None]:
ai2thor.__version__

In [None]:
torch.zeros(100).view(-1, 1).shape

In [None]:
import pandas as pd
import cv2
import pathlib

#=========================================================
#import torchtext

# Load FastText word vectors
#fast_text = torchtext.vocab.FastText(cache='../.vector_cache')

# Load ALFRED training data using torchtext
import torchtext.data as data

#  Basic english normalisation, lowers and seperates grammar
INSTRUCTION = data.Field(tokenize='basic_english', lower=True,
                         init_token='<sos>',
                         eos_token='<eos>',)
ACTION = data.Field(is_target=True)
ACTION_ARGUMENT_1 = data.Field(tokenize='basic_english',is_target=True)
ACTION_ARGUMENT_2 = data.Field(tokenize='basic_english',is_target=True)

ALFRED_DATA_ROOT = '../honours_research_repo/'
ALFRED_TRAIN_INSTRUCTION_TSV_FILENAME = ALFRED_DATA_ROOT + 'ALFRED_Train_InstActionArgs.tsv'
ALFRED_VALIDATION_INSTRUCTION_TSV_FILENAME = ALFRED_DATA_ROOT + 'ALFRED_Validation_InstActionArgs.tsv'
ALFRED_TEST_INSTRUCTION_TSV_FILENAME = ALFRED_DATA_ROOT + 'ALFRED_Test_InstActionArgs.tsv'

def filter_action(example):
    if example.action[0] == 'PickupObject':
        return True
    else:
        return False

train, val, test = data.TabularDataset.splits(
    path='', 
    train=ALFRED_TRAIN_INSTRUCTION_TSV_FILENAME,
    validation=ALFRED_VALIDATION_INSTRUCTION_TSV_FILENAME,
    test=ALFRED_TEST_INSTRUCTION_TSV_FILENAME, format='tsv',
    fields=[('trajectory_number', None),
            ('task_description', None),
            ('instruction_number', None),
            ('instruction', INSTRUCTION),
            ('action', ACTION),
            ('action_argument_1', ACTION_ARGUMENT_1),
            ('action_argument_2', ACTION_ARGUMENT_2)
           ], 
    filter_pred=filter_action)

INSTRUCTION.build_vocab(train,vectors=__word_vec__) #'fasttext.en.300d')
ACTION.build_vocab(train)
ACTION_ARGUMENT_1.build_vocab(train,vectors=__word_vec__)
ACTION_ARGUMENT_2.build_vocab(train,vectors=__word_vec__)
#=========================================================