In [3]:

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
%matplotlib notebook
from IPython.display import HTML
import time
import datetime
import json

#Import usual things required for graph nets
import numpy as np
import pandas as pd
import networkx as nx
import sonnet as snt
import tensorflow as tf
import os
import sys

import matplotlib.pyplot as plt
from matplotlib import animation
from matplotlib.patches import Polygon

import seaborn as sns
from sklearn.model_selection import train_test_split

#Import graph nets
from graph_nets import blocks
from graph_nets import utils_tf
from graph_nets import utils_np

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2
Num GPUs Available:  1


In [5]:
#Path to the scripts shared troughout phases
parent_path=os.path.abspath('..')
path_common =  '\\'.join(parent_path.split('\\')[:-2]) 

#Check if path exists
path_common_scripts = os.path.join(path_common,'Common','Scripts')
if(not os.path.exists(path_common_scripts)):
    print('The path: '+ path_common_scripts+' is not found!')
path_common = os.path.join(path_common,'Common')
#Check if path exists
if(not os.path.exists(path_common)):
    print('The path: '+ path_common+' is not found!')



#Add shared scripts to our current paths for importing
sys.path.insert(0, path_common_scripts)

#Retrieve test specification document
test_spec_json = os.path.join(path_common,'hyper_parameters.json')
#Check if file exists
if(not os.path.exists(test_spec_json)):
    print('The file: '+ test_spec_json+' is not found!')
with open(test_spec_json) as f:
    test_spec = json.load(f)
    
params = test_spec['parameters']

model_params = params['model_parameters']

experiment_params = params['experiment_parameters']




#Save experiment begin date
current_time = datetime.datetime.now()
print("Current_date: "+str(current_time))
current_time = "".join(current_time.strftime("%x").split('/'))

NUM_TRAINING_ITERATIONS = experiment_params['number_of_training_iterations']
BATCH_SIZE_TR = experiment_params['training_batch_size']
BATCH_SIZE_TE = experiment_params['testing_batch_size']
BATCH_SIZE_GE = experiment_params['generalisation_batch_size']
NUM_PROCESSING_STEPS_TR = experiment_params['number_of_processing_steps_tr']
NUM_PROCESSING_STEPS_TE = experiment_params['number_of_processing_steps_te']
NUM_PROCESSING_STEPS_GE = experiment_params['number_of_processing_steps_ge']

FUL_LATENT_SIZE = model_params['ful_block_latent_size']
FUL_LAYER_NUMBER = model_params['ful_block_number_of_layers']
IND_LATENT_SIZE = model_params['ind_block_latent_size']
IND_LAYER_NUMBER = model_params['ind_block_number_of_layers']


# Folder description to distinguish between experiment runs
desc = "ntr="+str(NUM_TRAINING_ITERATIONS)
desc = desc +"_btr="+str(BATCH_SIZE_TR)
desc = desc +"_high"
desc = desc +"_date="+str(current_time)

#Set seed
SEED = 4893
desc = desc +"seed="+str(SEED)

np.random.seed(SEED)
rand = np.random.RandomState(SEED)
tf.random.set_seed(SEED)

Current_date: 2021-04-08 16:09:27.080443


In [6]:
base = os.getcwd()
if(not os.path.exists(base)):
    os.makedirs(base)
    
#Path to saved models and trajectories
path_saves = os.path.join(base,desc,'Saves')
if(not os.path.exists(path_saves)):
    os.makedirs(path_saves)
    
#Path to plots
path_plots = os.path.join(base,desc,'Plots')
if(not os.path.exists(path_plots)):
    os.makedirs(path_plots)
    
#Path to animations
path_animations = os.path.join(base,desc,'Animations')
if(not os.path.exists(path_animations)):
    os.makedirs(path_animations)
    
#Save the used parameters as a json file into this folder
with open(os.path.join(path_saves,'specification.json'), 'w') as fp:
    json.dump(test_spec, fp)
    
#Path to Datasets
path_dataset = os.path.abspath('..\\..\\..\\..\\..')
path_dataset = os.path.join(path_dataset,'Data')

if(not os.path.exists(path_dataset)):
    print("Data folder does not exist.")

In [4]:
# Import model
import GNNModels_tf2 as models
# Import training plot
from TrainingPlot import training_plot, training_plot_log_scale
# Import functions for creating graph
from Graph_creator_functions import rigid_graph_from_pos_all, rigid_graph_from_pos_closest
# Import data loader module
from PushDatasetLoad import load_dataset, collect_trajectory_indeces, create_train_valid_test, collect_states, remove_effector, outlier_remover
# Import Dataset Feeder function
from PushDatasetFeed import DataFeeder
# Import simulation functions
from PushDatasetSimulator import PushDatasetSimulator
# Import function for experiments
from LossFunctions import velocity_loss_single_step, velocity_loss_single_step_high, make_all_runnable_in_session
# Import visualisation functions
from VisualisationFunctions import visualise_trajectory, visualise_groundtruth

## Data loading:
In this section of the code we load the data. Split the trajectories into training, validation and testing sets.

In [5]:
# Set paths:
file_1 = os.path.join(path_dataset, "Rect1_Data100ms_1_of_2_flagged.csv")
file_2 = os.path.join(path_dataset, "Rect1_Data100ms_2_of_2_flagged.csv")
# Load dataset
df_set_1, STEP_SIZE = load_dataset(file_1)
df_set_2, STEP_SIZE = load_dataset(file_2)
# Collect trajectory indeces
VEL_ACCS = ['100.0-0.0']
trajectory_indeces,df_set_1 = collect_trajectory_indeces(df_set_1, ts = STEP_SIZE, vel_accs = VEL_ACCS)
_, df_set_2 = collect_trajectory_indeces(df_set_2, ts = STEP_SIZE, vel_accs = VEL_ACCS)
# Split indeces so that no trajectory is contained in both train, validation or test datasets
tr_inds, va_inds, te_inds = create_train_valid_test(trajectory_indeces, 0.25)
# Collect trajectories from these 
df_tr_1 = collect_states(tr_inds, df_set_1)
df_tr_2 = collect_states(tr_inds, df_set_2)
df_va_1 = collect_states(va_inds, df_set_1)
df_va_2 = collect_states(va_inds, df_set_2)
df_te_1 = collect_states(te_inds, df_set_1)
df_te_2 = collect_states(te_inds, df_set_2)

# Remove Outliers
df_tr_1, df_tr_2 = outlier_remover(df_tr_1, df_tr_2)
df_va_1, df_va_2 = outlier_remover(df_va_1, df_va_2)
df_te_1, df_te_2 = outlier_remover(df_te_1, df_te_2)


df_dict = {"tr_1":df_tr_1, 
           "tr_2":df_tr_2, 
           "va_1":df_va_1,
           "va_2":df_va_2,
           "te_1":df_te_1,
           "te_2":df_te_2,}

['100.0-0.0']
['100.0-0.0']
['100.0-0.0']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


# Train

In [6]:
# Create the model.
model = models.EncodeProcessDecode(ind_layer_numbers = IND_LAYER_NUMBER,
                                   ind_latent_unit_size = IND_LATENT_SIZE,
                                   ful_layer_numbers = FUL_LAYER_NUMBER,
                                   ful_latent_unit_size = FUL_LATENT_SIZE,
                                   node_output_size=2,
                                   edge_output_size=2,
                                   global_output_size=1)



LEARNING_RATE = model_params['learning_rate']
#optimizer = tf.keras.optimizers.Adam()
optimizer = snt.optimizers.Adam(LEARNING_RATE)

In [7]:
def update_step(inputs_tr, targets_tr):
    with tf.GradientTape() as tape:
        outputs_tr = model(inputs_tr, 1)
        # Only Nodes
        outputs_tr_nodes = outputs_tr[0].nodes
        targets_tr_nodes = targets_tr.nodes
        # Loss
        #print(outputs_tr_nodes)
        #print(targets_tr_nodes)
        #c_output_tr_nodes, c_target_tr_nodes = remove_effector(outputs_tr_nodes, targets_tr_nodes)
        loss_tr = velocity_loss_single_step(targets_tr, outputs_tr[0])
    
    gradients = tape.gradient(loss_tr, model.trainable_variables)
    optimizer.apply(gradients, model.trainable_variables)
    return outputs_tr, loss_tr, gradients

def test_step(inputs, targets):
    outputs = model(inputs, 1)
    # Only Nodes
    output_nodes = outputs[0].nodes
    target_nodes = targets.nodes
    # Loss
    #c_output_nodes, c_target_nodes = remove_effector(output_nodes, target_nodes)
    loss = velocity_loss_single_step(targets, outputs[0])
    
    return loss

In [8]:
feeder = DataFeeder(SEED)
#Get some example data that resembles the tensors that will be fed
# into update_step():
d = feeder.get_data(dfs = df_dict,
                    tr_batch_size = BATCH_SIZE_TR,
                    va_batch_size = BATCH_SIZE_GE,
                    te_batch_size = BATCH_SIZE_TE,
                    graph_creator = rigid_graph_from_pos_closest)

X_tr = d["X_tr"]
Y_tr = d["Y_tr"]

# Get the input signature for that function by obtaining the specs
input_signature = [
  utils_tf.specs_from_graphs_tuple(X_tr),
  utils_tf.specs_from_graphs_tuple(Y_tr)
]

# Compile the update function using the input signature for speedy code.
compiled_update_step = tf.function(update_step, input_signature=input_signature)

In [None]:
# Setup logging
last_iteration = 0
logged_iterations = []
losses_step_tr = []
losses_step_va = []
losses_step_te = []
gradients = []

log_every_iteration = 5

print("# (iteration number) out of max iteration, T (elapsed seconds), "
      "Ltr, "
      "Lte, "
      "Lge")


feeder = DataFeeder(SEED)

start_time = time.time()
last_log_time = start_time
for iteration in range(last_iteration, NUM_TRAINING_ITERATIONS):
    last_iteration = iteration
    do_logging = (last_iteration % log_every_iteration == 0)
    
    
    data = feeder.get_data(dfs = df_dict, 
                 tr_batch_size = BATCH_SIZE_TR,
                 va_batch_size = BATCH_SIZE_GE,
                 te_batch_size = BATCH_SIZE_TE,
                 graph_creator = rigid_graph_from_pos_closest, 
                 te_and_va = do_logging)
    
    X_tr = data["X_tr"]
    Y_tr = data["Y_tr"]
    
    
    
    # Do one tick of training    
    outputs_tr, loss_tr, gradient = compiled_update_step(X_tr, Y_tr)
    
    # Don't log at every iteration
    if do_logging:
        X_va = data["X_va"]
        Y_va = data["Y_va"]
        X_te = data["X_te"]
        Y_te = data["Y_te"]
        # Validation and Test
        loss_va = test_step(X_va, Y_va)
        loss_te = test_step(X_te, Y_te)
        
        # Convert losses to normal values
        loss_tr = loss_tr[0].numpy()
        loss_va = loss_va[0].numpy()
        loss_te = loss_te[0].numpy()
        
        the_time = time.time()
        elapsed_since_last_log = the_time - last_log_time
        last_log_time = the_time
        
        # Log data into lists
        elapsed = time.time() - start_time
        losses_step_tr.append(loss_tr)
        #if loss_tr > 1.0:
        #    print(data["tr_indeces"])
        losses_step_va.append(loss_va)
        losses_step_te.append(loss_te)
        gradients.append(gradient)
        
        logged_iterations.append(iteration)
        
        print("# {:05d}, Max {:05d}, T {:.1f}, Ltr {:.6f}, Lva {:.6f}, Lte {:.6f}".format(
            iteration, NUM_TRAINING_ITERATIONS, elapsed, loss_tr, loss_va, loss_te))
    
    

# (iteration number) out of max iteration, T (elapsed seconds), Ltr, Lte, Lge
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


# 00000, Max 00500, T 11.6, Ltr 1.552497, Lva 82.904869, Lte 82.955223
# 00005, Max 00500, T 12.5, Ltr 1.742050, Lva 0.491208, Lte 0.482782
# 00010, Max 00500, T 13.5, Ltr 0.187102, Lva 0.027847, Lte 0.027931
# 00015, Max 00500, T 14.4, Ltr 0.158059, Lva 0.092871, Lte 0.088417
# 00020, Max 00500, T 15.4, Ltr 0.054512, Lva 0.065246, Lte 0.068971
# 00025, Max 00500, T 16.3, Ltr 0.004702, Lva 0.017504, Lte 0.015344
# 00030, Max 00500, T 17.3, Ltr 0.015499, Lva 0.002630, Lte 0.004649
# 00035, Max 00500, T 18.2, Ltr 0.018478, Lva 0.012847, Lte 0.013435
# 00040, Max 00500, T 19.2, Ltr 0.009410, Lva 0.012300, Lte 0.011331
# 00045, Max 00500, T 20.1, Ltr 0.003015, Lva 0.004268, Lte 0.005869
# 00050, Max 00500, T 21.1, Ltr 0.003981, Lva 0.002041, Lte 0.003022
# 00055, Max 00500, T 22.1, Ltr 0.003633, Lva 0.002823, Lte 0.003192
# 00060, Max 00500, T 23.0, Ltr 0.003645, Lva 0.003243, Lte 0.004078
# 00065, Max 00500, T 24.0, Ltr 0.003648, Lva 0.003992, Lte 0.005335
# 00070, Max 00500, T 25.0, Ltr 

## Saving losses

In [None]:
def save_data(name, data):
    data = np.array(data)
    if os.path.exists(os.path.join(path_saves, name)):
        print("The file: "+ name + " already exists. Delete it before saving a new trajectory!")
    else:
        if not os.path.exists(os.path.join(path_saves)):
            os.makedirs(os.path.join(path_saves)) 
            
        np.save(os.path.join(path_saves, name), data)

save_data("iterations", logged_iterations)
save_data("losses_tr", losses_step_tr)
save_data("losses_te", losses_step_te)
save_data("losses_va", losses_step_va)
save_data("gradients", gradients)


In [None]:
plt.close('all')
fig, ax = training_plot(logged_iterations, losses_step_tr, losses_step_va, losses_step_te, path_plots)
plt.show()

In [None]:
plt.close('all')
fig, ax = training_plot_log_scale(logged_iterations, losses_step_tr, losses_step_va, losses_step_te, path_plots)
plt.show()

## Evaluation

In [None]:
# Parameters for the visualisation 
ROLLOUT_BATCH_SIZE = 1
ROLLOUT_TIMESTEPS = 5

sim = PushDatasetSimulator(rollout_steps = ROLLOUT_TIMESTEPS, 
                           step_size = STEP_SIZE,
                           batch_size = ROLLOUT_BATCH_SIZE)

# Get trajectory from the dataset
X_g, Y_g, i_np = sim.get_trajectories(df_te_1, df_te_2, rigid_graph_from_pos_closest)

# Convert trajectories into graphs
traj_X, traj_Y = sim.convert_trajectories(X_g,Y_g)
# Predict trajectory using the model
pred_trajectory, real_trajectory = sim.predict_trajectory_velocity(model, traj_X, traj_Y)

In [None]:
plt.close('all')
fig, ax, anim = visualise_trajectory(pred_trajectory, real_trajectory, ROLLOUT_TIMESTEPS)
anim.save(os.path.join(path_animations,'visualisation.gif'), writer='imagemagick', fps=60)
plt.close('all')
HTML(anim.to_jshtml())