In [8]:
from tasks.R2R.env import R2RBatch
from utils import Tokenizer, read_vocab
from vocab import TRAINVAL_VOCAB, TRAIN_VOCAB
vocab = read_vocab(TRAIN_VOCAB)
tok = Tokenizer(vocab)
env = R2RBatch(['none'], batch_size=64, splits=['train','val_seen','val_unseen'],tokenizer=tok)

Loading navigation graphs for 72 scans
R2RBatch loaded with 17409 instructions, using splits: train,val_seen,val_unseen


In [9]:
import torch
import torch.nn as nn
import json
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

class Net(nn.Module):
    def __init__(self, input_dim):
        super(Net, self).__init__()
        self.net = nn.Sequential(
            nn.BatchNorm1d(input_dim),
            nn.Linear(input_dim, input_dim),
            nn.BatchNorm1d(input_dim),
            nn.Tanh(),
            nn.Linear(input_dim, 1)
        )

    def forward(self, x):
        x = self.net(x).squeeze(-1)
        return x

In [10]:
def average(_l):
    return float(sum(_l)) / len(_l)

def count_prefix_len(l1,l2):
    res = 0
    while(res < len(l1) and res < len(l2) and l1[res] == l2[res]):
        res += 1
    return res

def get_path_len(scanId, path):
    path_len = 0
    prev = path[0]
    for curr in path[1:]:
        path_len += env.distances[scanId][prev][curr]

def load_data(filenames):
    all_data = []
    for fn in filenames:
        with open(fn,'r') as f:
            train_file = json.loads(f.read())
        train_instrs = list(train_file.keys())
        train_data = {}
        
        for instr_id in train_instrs:
            path_id = int(instr_id.split('_')[0])
            scanId = env.gt[path_id]['scan']
            new_data = {
                'instr_id': instr_id,
                'candidates': [],
                'candidates_path': [],
                'reranker_inputs': [],
                'distance': [],
                'gt': env.gt[path_id],
                'gold_idx': -1,
                'goal_viewpointId': env.gt[path_id]['path'][-1],
                'gold_len': get_path_len(scanId, env.gt[path_id]['path']),
            }
            self_len = 0
            for i, candidate in enumerate(train_file[instr_id]):
                _, world_states, actions, sum_logits, mean_logits, sum_logp, mean_logp, pm, speaker, scorer = candidate
                new_data['candidates'].append(candidate)
                new_data['candidates_path'].append([ws[1] for ws in world_states])
                new_data['reranker_inputs'].append([len(world_states), sum_logits, mean_logits, sum_logp, mean_logp, pm, speaker] * 4)
                new_data['distance'].append(env.distances[scanId][world_states[-1][1]][new_data['goal_viewpointId']])
                my_path = [ws[1] for ws in world_states]
                if my_path == env.gt[path_id]['path']:
                    new_data['gold_idx'] = i
                
            new_data['self_len'] = self_len
            train_data[instr_id] = new_data
            
        print(fn)
        print('gold',average([d['gold_idx'] != -1 for d in train_data.values()]))
        print('oracle',average([any([dis < 3.0 for dis in d['distance']]) for d in train_data.values()]))
        all_data.append(train_data)
        
    return all_data

#[train_data, val_seen, val_unseen] = load_data(['cache_train40True.json','cache_val_seen40True.json','cache_val_unseen40True.json'])
[train_data, val_seen, val_unseen] = load_data(['cache_train40False.json','cache_val_seen40False.json','cache_val_unseen40False.json'])
#[train_data, val_unseen] = load_data(['cache_train40False.json','cache_val_unseen20False.json'])
#[val_unseen] = load_data(['cache_val_unseen40False.json'])
#data_dim = len(train_data.values()['reranker_inputs'][0])


cache_train40False.json
gold 0.8408718569698697
oracle 0.9913099223591424
cache_val_seen40False.json
gold 0.6581782566111655
oracle 0.9285014691478942
cache_val_unseen40False.json
gold 0.49638143891017456
oracle 0.902085994040017


In [12]:
net = Net(28).cuda()
#net.load_state_dict(torch.load('candidates_ranker_{}'.format(.6321)))

In [13]:
batch_labels = []
valid_points = 0

for training_point in train_data.values():
    labels = training_point['distance']
    gold_idx = np.argmin(labels)
    ac_len = len(labels)
    choice = 1
    x_1 = []
    x_2 = []
    if choice == 1:
        for i in range(ac_len):
            for j in range(ac_len):
                if labels[i] <= 3.0 and labels[j] > 3.0:
                    x_1.append(i)
                    x_2.append(j)
                    valid_points += 1
    else:
        for i in range(ac_len):
            if labels[i] > 3.0:
                x_1.append(gold_idx)
                x_2.append(i)
                valid_points += 1
    batch_labels.append((x_1, x_2))

print(valid_points)

4374829


In [16]:
x_1 = []
x_2 = []
optimizer = optim.SGD(net.parameters(), lr=0.00005, momentum=0.6)
best_performance = 0.0
for epoch in range(30):  # loop over the dataset multiple times
    epoch_loss = 0
    for i, (instr_id, training_point) in enumerate(train_data.items()):
        inputs = training_point['reranker_inputs']
        labels = training_point['distance']
        ac_len = len(labels)
        
        inputs = torch.stack([torch.Tensor(r) for r in inputs]).cuda()
        labels = torch.Tensor(labels)
        scores = net(inputs)
        
        if i%10 == 0 and len(x_1):
            x1 = torch.cat(x_1, 0)
            x2 = torch.cat(x_2, 0)
            loss = F.relu(1.0 - (x1 - x2)).mean()
            #s = x1-x2
            #loss = (-s + torch.log(1 + torch.exp(s))).mean()
            loss.backward()
            epoch_loss += loss.item()
            optimizer.step()
            optimizer.zero_grad()
            x_1 = []
            x_2 = []
        
        if len(batch_labels[i][0]) > 0:
            x_1.append(scores[batch_labels[i][0]])
            x_2.append(scores[batch_labels[i][1]])

        
    print('epoch', epoch, 'loss', epoch_loss)
    
    for env_name, data_dict in zip(['train','val_unseen'],[train_data, val_unseen]):
        successes = []
        for instr_id, point in data_dict.items():
            inputs = point['reranker_inputs']
            labels = point['distance']
            inputs = torch.stack([torch.Tensor(r) for r in inputs]).cuda()
            labels = torch.Tensor(labels)
            scores = net(inputs)
            pred = scores.max(0)[1].item()
            successes.append(int(labels[pred] <= 3.0))
        print(env_name, average(successes))
        if env_name is 'val_unseen' and average(successes) > best_performance:
            best_performance = average(successes)
            torch.save(net.state_dict(), 'candidates_ranker_{}'.format(best_performance))

print('Finished Training')

epoch 0 loss 611.0856170654297
train 0.9017024004558729
val_unseen 0.6275010642826735
epoch 1 loss 611.3334204405546
train 0.9017024004558729
val_unseen 0.6270753512132823
epoch 2 loss 611.2385745197535
train 0.9018448607450673
val_unseen 0.6270753512132823
epoch 3 loss 611.1470371484756
train 0.9017736306004701
val_unseen 0.6270753512132823
epoch 4 loss 611.0579750239849
train 0.9016311703112757
val_unseen 0.626649638143891
epoch 5 loss 610.9714311957359
train 0.9017736306004701
val_unseen 0.626649638143891
epoch 6 loss 610.8873623609543
train 0.9020585511788589
val_unseen 0.626649638143891
epoch 7 loss 610.8054997324944
train 0.9021297813234561
val_unseen 0.6270753512132823
epoch 8 loss 610.7258523404598
train 0.9022010114680533
val_unseen 0.6275010642826735
epoch 9 loss 610.6485915482044
train 0.9024147019018448
val_unseen 0.6275010642826735
epoch 10 loss 610.5733136832714
train 0.9025571621910392
val_unseen 0.6275010642826735
epoch 11 loss 610.4998899549246
train 0.9027708526248308

In [53]:

    
for env_name, data_dict in zip(['train','val_seen','val_unseen'],[train_data,val_seen,val_unseen]):
    successes = []
    inspect = [1,2,3,4,5,6]
    other_success = [[] for _ in range(len(inspect))]
    spl = []
    for instr_id, point in data_dict.items():
        inputs = point['reranker_inputs']
        labels = point['distance']
        inputs = torch.stack([torch.Tensor(r) for r in inputs]).cuda()
        labels = torch.Tensor(labels)
        scores = net(inputs)
        pred = scores.max(0)[1].item()
        successes.append(int(labels[pred] < 3.0))
        
        if (int(labels[pred] < 3.0)):
            for i in range(len(point['distance'])):
                pass
                #print( point['reranker_inputs'][i])
                #print( scores[i].item(), point['distance'][i], point['reranker_inputs'][i][5])
            #print("\n")
        
        for idx,i in enumerate(inspect):
            pred = np.argmax([_input[i] for _input in point['reranker_inputs']])
            other_success[idx].append(int(labels[pred] < 3.0))
        
        #if labels[pred] < 3.0:
        #    sp_len = get_path_len(env.gt[instr_id.split('_')[0]]['path'])
        #    my_len = get_path_len()
        #    spl.append(sp_len / my_len)
        #else:
        #    spl.append(0)
    print(env_name, average(successes))
    for idx in range(len(inspect)):
        print(average(other_success[idx]))


train 0.9015599401666785
0.7878766293895577
0.8578246313840017
0.9125293824346463
0.9160196595199088
0.6671415342973146
0.6999074008120236
val_seen 0.7159647404505387
0.6248775710088149
0.6699314397649363
0.6856023506366308
0.693437806072478
0.5367286973555337
0.5377081292850147
val_unseen 0.6402724563644104
0.5649212430821626
0.5440613026819924
0.5615155385270327
0.5874840357598978
0.5014899957428693
0.4367816091954023


In [45]:

perf_name = '{:.4f}'.format(average(successes))
torch.save(net.state_dict(), 'candidates_ranker_{}'.format(perf_name))