<a href="https://colab.research.google.com/github/Nik-Kras/ToMnet-N/blob/main/TrainingToMnet-N.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Initialise classes for Networks, Loading and Processing

## Layers

In [11]:
### LAYERS
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
class NeuralNetLayers:

The parent class for both the character net and the prediction net.
@author: Chuang, Yun-Shiuan
"""

import tensorflow as tf
from tensorflow import keras

from keras.models import Model
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import TimeDistributed
from keras.layers import BatchNormalization
from keras.layers import LSTM
from keras import activations

class CustomCnn(keras.layers.Layer):
    def __init__(self, input_tensor=None, activation="linear", filters=32,
                 UseTimeWrapper=False, regularisation_value = 0.001, **kwargs):
        super(CustomCnn, self).__init__(**kwargs)
        self.UseTimeWrapper = UseTimeWrapper
        self.input_tensor = input_tensor
        self.activation = activation
        self.filters = filters
        self.regularisation_value = regularisation_value

        if input_tensor is None:
            self.conv = tf.keras.layers.Conv2D(filters=filters,
                                                kernel_size=(3, 3),
                                                strides=(1, 1),
                                                activation=activation,
                                                padding="same",
                                                kernel_regularizer = keras.regularizers.l2(self.regularisation_value),
                                                bias_regularizer = keras.regularizers.l2(self.regularisation_value),
                                                kernel_initializer = tf.keras.initializers.HeNormal())
        else:
            self.conv = tf.keras.layers.Conv2D(filters=filters,
                                                kernel_size=(3, 3),
                                                strides=(1, 1),
                                                activation=activation,
                                                padding="same",
                                                input_shape=input_tensor,
                                                kernel_regularizer = keras.regularizers.l2(self.regularisation_value),
                                                bias_regularizer = keras.regularizers.l2(self.regularisation_value),
                                                kernel_initializer = tf.keras.initializers.HeNormal())
        if UseTimeWrapper: self.conv_handler = tf.keras.layers.TimeDistributed(self.conv)

    def call(self, inputs):
        if self.UseTimeWrapper: x = self.conv_handler(inputs)
        else: x = self.conv(inputs)
        return x

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            "regularisation_value": self.regularisation_value,
            "UseTimeWrapper": self.UseTimeWrapper,
            "input_tensor": self.input_tensor,
            "activation": self.activation,
            "filters": self.filters,
        })
        return config

def CustomCnnCharNet(input_tensor=None, activation="linear", filters=32, **kwargs):
    return CustomCnn(input_tensor=input_tensor, activation=activation, filters=filters, UseTimeWrapper=True, **kwargs)

def CustomCnnPredNet(input_tensor=None, activation="linear", filters=32, **kwargs):
    return CustomCnn(input_tensor=input_tensor, activation=activation, filters=filters, UseTimeWrapper=False, **kwargs)

class ResBlock(keras.layers.Layer):
    def __init__(self, UseTimeWrapper=False, filters=32, **kwargs):
        super(ResBlock, self).__init__(**kwargs)
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.relu_conv = tf.keras.layers.Activation('relu')
        self.UseTimeWrapper = UseTimeWrapper
        if self.UseTimeWrapper:
            self.conv1 = CustomCnnCharNet(activation="linear", filters=filters)
            self.conv2 = CustomCnnCharNet(activation="linear", filters=filters)
        else:
            self.conv1 = CustomCnnPredNet(activation="linear", filters=filters)
            self.conv2 = CustomCnnPredNet(activation="linear", filters=filters)

    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.relu_conv(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = tf.nn.relu(x + inputs)
        return x

    def get_config(self):
      config = super().get_config().copy()
      config.update({
          "UseTimeWrapper": self.UseTimeWrapper
      })
      return config

def ResBlockCharNet(filters=32):
    return ResBlock(UseTimeWrapper=True, filters=filters)

def ResBlockPredNet(filters=32):
    return ResBlock(UseTimeWrapper=False, filters=filters)

class CustomLSTM(keras.layers.Layer):
    def __init__(self, num_hidden = 128,  **kwargs):
        super(CustomLSTM, self).__init__(**kwargs)  # including name = name
        self.num_hidden = num_hidden
        self.lstm = LSTM(units=num_hidden,
                        activation = activations.tanh,
                        recurrent_activation = activations.sigmoid)
        self.bn = BatchNormalization()

    def call(self, inputs):
        x = self.lstm(inputs)
        # x = self.bn(x)
        return x

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            "num_hidden": self.num_hidden
        })
        return config

## CharNet

In [12]:
### CharNet
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
class CharNet(nnl.NeuralNetLayers):

For the single trajectory τi in the past episode, the
ToMnet forms the character embedding echar,i as follows. We
 (1) pre-process the data from each time-step by spatialising the actions,
 a(obs), concatenating these with the respective states, x(obs),
 (2) passing through a 5-layer resnet, with 32 channels, ReLU nonlinearities,
 and batch-norm, followed by average pooling.
 (3) We pass the results through an LSTM with 64 channels,
 with a linear output to either a 2-dim or 8-dim echar,i (no substantial difference in results).
@author: Chuang, Yun-Shiuan; Edwinn
"""

import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense

# --------------------------------------------------------------
# CharNet is a layer, as it doesn't have separate and own training,
# it is simply a part of whole network, so can be considered as a layer
# --------------------------------------------------------------
class CharNet(keras.layers.Layer):

    def __init__(self, input_tensor, n, N_echar, filters=32):
        super(CharNet, self).__init__()

        # self.input_tensor = input_tensor
        self.n = n
        self.N_echar = N_echar

        self.conv = CustomCnnCharNet(input_tensor=input_tensor, filters=filters)
        self.res_blocks = [None] * n
        for i in range(n):
            self.res_blocks[i] = ResBlockCharNet(filters=filters)
        # Global Pool
        self.lstm = CustomLSTM()
        self.e_char = Dense(N_echar)

    def call(self, inputs):
        """
        Build the character net.
        """

        # input_tensor = self.input_tensor
        n = self.n
        N_echar = self.N_echar

        batch_size, trajectory_size, height, width, depth = inputs.get_shape().as_list()

        # --------------------------------------------------------------
        # Paper codes
        # (16, 10, 12, 12, 11) -> (16, 10, 12, 12, 32)
        # Add initial Conv2D layer
        # Conv2D standard: Shape = (batch_size, width, height, channels)
        # Conv2D takes only width x height x channels (12, 12, 11)
        # Time Distributed layer feeds a Conv2D with time-frames (10 frames)
        # That process is happening in parallel for 16 objects in one batch
        # --------------------------------------------------------------
        x = self.conv(inputs)

        # --------------------------------------------------------------
        # Paper codes
        # (16, 10, 12, 12, 11) -> (16, 10, 12, 12, 32)
        # Add n residual layers
        # Conv2D takes only width x height x channels (12, 12, 11)
        # Time Distributed layer feeds a Conv2D with time-frames (10 frames)
        # That process is happening in parallel for 16 objects in one batch
        # --------------------------------------------------------------
        for i in range(n):
            x = self.res_blocks[i](x)  ### Possible error here!!!

        # --------------------------------------------------------------
        # Paper codes
        # (16, 10, 12, 12, 32) ->  (16, 10, 32)
        # Add average pooling
        # Collapse the spatial dimensions
        # --------------------------------------------------------------
        x = tf.reduce_mean(input_tensor=x, axis=[2, 3])

        # --------------------------------------------------------------
        # Paper codes
        # (16, 10, 32) ->  (16, 64)
        # Add LSTM
        # Standard: Shape = (batch_size, time_step, features)
        # for each x_i(t)(example_i's step_t): a (64, 1) = W(64, 32) * x (32, 1)
        # --------------------------------------------------------------
        x = self.lstm(x)

        # --------------------------------------------------------------
        # Paper codes
        # (16, 64) -> (16, 4)
        # Add Fully connected layer
        # (batch_size, features) - > (batch_size, e_char)
        # --------------------------------------------------------------
        x = self.e_char(x)

        return x


## PredNet


In [13]:
### PredNet
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
class PredNet(nnl.NeuralNetLayers):

In this and subsequent experiments,
we make three predictions:
  (1)next-step action,
  (2)which objects are consumed by the end of the episode, and
  (3) successor representations.
  We use a shared torso for these predictions, from which separate heads branch off.

  For the prediction torso, we
    (1) spatialise echar,i,
    (2) and concatenate with the query state;
    (3) this is passed into a 5-layer resnet, with 32 channels, ReLU nonlinearities, and batch-norm.

  Consumption prediction head.
  From the torso output:
    （1) a 1-layer convnet with 32 channels and ReLUs, followed by average pooling, and
     (2) a fully-connected layer to 4-dims,
     (3) followed by a sigmoid. This gives the respective Bernoulli probabilities
     that each of the four objects will be consumed by the end of the episode.
     [Unlike the paper, I replaced this sigmoid unit by a softmax unit.]
@author: Chuang, Yun-Shiuan
"""

import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense

# --------------------------------------------------------------
# CharNet is a layer, as it doesn't have separate and own training,
# it is simply a part of whole network, so can be considered as a layer
# --------------------------------------------------------------
class PredNet(keras.layers.Layer):

    def __init__(self, n, filters=32):
        super(PredNet, self).__init__()
        self.n = n

        self.e_char_shape = 8
        self.current_state_shape = (12, 12, 6)

        self.conv_1 = CustomCnnPredNet(input_tensor=self.current_state_shape, filters=filters)
        self.res_blocks = [None] * n
        for i in range(n):
          self.res_blocks[i] = ResBlockPredNet(filters=filters)
        self.conv_2 = CustomCnnPredNet(activation='relu', filters=filters)
        self.fc1 = Dense(units=128, activation=activations.relu)
        self.fc2 = Dense(units=128, activation=activations.relu)
        self.fc3 = Dense(units=64, activation=activations.relu)
        # drop_out_1 = Dropout(rate = 0.2) ### Could be added in the future
        self.goal_predict = Dense(units=4, activation=activations.softmax)

    def call(self, inputs):
        """
        Build the character net.
        """
        ### Check that inputs.shape == (None, 13, 12, 8)

        # Get shapes
        # batch_size, height, width, depth = inputs.get_shape().as_list()
        # _, embedding_length = e_char.get_shape().as_list()
        n = self.n

        # --------------------------------------------------------------
        # Paper codes
        # (16, 13, 12, 8) -> (16, 12, 12, 6) + (16, 8)  # OLD Version
        # (16, 12, 12, 7)
        # Decompose input data
        # Initially in is a mix of Current State and e_char embedding space
        # --------------------------------------------------------------
        # input_current_state = inputs[..., 0:12, 0:12, 0:6]
        # e_char = inputs[..., 12, 0, :]

        # --------------------------------------------------------------
        # Paper codes
        # (16, 12, 12, 6) -> (16, 12, 12, 32)
        # (16, 12, 12, 7) -> (16, 12, 12, 32)
        # Use 3x3 conv layer to shape the depth to 32
        # to enable resnet to work (addition between main path and residual connection)
        # --------------------------------------------------------------
        # x = self.conv_1(input_current_state)
        x = self.conv_1(inputs)

        # --------------------------------------------------------------
        # Paper codes
        # (16, 12, 12, 32) -> (16, 12, 12, 32)
        # Add n residual layers
        # Conv2D takes only width x height x channels (12, 12, 11)
        # Time Distributed layer feeds a Conv2D with time-frames (10 frames)
        # That process is happening in parallel for 16 objects in one batch
        # --------------------------------------------------------------
        for i in range(n):
          x = self.res_blocks[i](x)    ### Possible error here!!!

        # --------------------------------------------------------------
        # Paper codes
        # (16, 12, 12, 32) -> (16, 12, 12, 32)
        # Add CNN after Res Blocks
        # --------------------------------------------------------------
        x = self.conv_2(x)

        # --------------------------------------------------------------
        # Paper codes
        # (16, 12, 12, 32) -> (16, 32)
        # Add average pooling
        # Collapse the spatial dimensions
        # --------------------------------------------------------------
        x = tf.reduce_mean(input_tensor=x, axis=[1, 2])

        # --------------------------------------------------------------
        # Paper codes
        # (16, 32) + (16, 8) -> (16, 32, 1) + (16, 8, 1) - >
        # (16, 40, 1) -> (16, 40)
        # Concatenate tensor with e_char
        # Concatenation requires a common dimentions which cannot be a batch
        # --------------------------------------------------------------
        # x = tf.expand_dims(x, axis=-1)
        # e_char = tf.expand_dims(e_char, axis=-1)
        #
        # x = tf.keras.layers.Concatenate(axis=1)([x, e_char])
        # x = x[..., 0]

        # --------------------------------------------------------------
        # Paper codes
        # (16, 40) -> (16, 60) -> (16, 4)
        # Fully connected layer with dropout for regularization
        # --------------------------------------------------------------
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x =  self.goal_predict(x)

        return x

## ToMnet-N

In [14]:
### ToMnet
import tensorflow as tf
from tensorflow import keras

import numpy as np
from keras.models import Model
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import TimeDistributed
from keras.layers import BatchNormalization
from keras.layers import LSTM
from keras import activations

class ToMnet(Model):

    LENGTH_E_CHAR = 8
    NUM_RESIDUAL_BLOCKS = 8

    def __init__(self, ts, w, h, d, Ne_char=8, N_res_blocks=8, filters=32):
        super(ToMnet, self).__init__(name="ToMnet-N")

        self.MAX_TRAJECTORY_SIZE = ts  # 20-50
        self.MAZE_WIDTH = w  # 12
        self.MAZE_HEIGHT = h  # 12
        self.MAZE_DEPTH_TRAJECTORY = d  # 10
        self.LENGTH_E_CHAR = Ne_char
        self.NUM_RESIDUAL_BLOCKS = N_res_blocks

        self.INPUT_SHAPE = (self.MAX_TRAJECTORY_SIZE+1, self.MAZE_WIDTH, self.MAZE_HEIGHT, self.MAZE_DEPTH_TRAJECTORY)
        self.TRAJECTORY_SHAPE = (self.MAX_TRAJECTORY_SIZE, self.MAZE_WIDTH, self.MAZE_HEIGHT, self.MAZE_DEPTH_TRAJECTORY) # 20x12x12x10
        self.CURRENT_STATE_SHAPE = (self.MAZE_WIDTH, self.MAZE_HEIGHT, self.MAZE_DEPTH_TRAJECTORY-4)                      # 12x12x6

        # Create the model
        self.char_net = CharNet(input_tensor=self.TRAJECTORY_SHAPE,
                                n=self.NUM_RESIDUAL_BLOCKS,
                                N_echar=self.LENGTH_E_CHAR,
                                filters=filters)

        self.pred_net = PredNet(n=self.NUM_RESIDUAL_BLOCKS, filters=filters)

        # Set compilers / savers / loggers / callbacks

    def call(self, data):

        # To fix ERROR with Tensor <-> Numpy compatibility
        tf.compat.v1.enable_eager_execution()

        input_trajectory = data[0]   # input_traj            # inputs[..., 0:self.MAX_TRAJECTORY_SIZE, :, :, :]
        input_current_state = data[1] #  input_current    # inputs[..., self.MAX_TRAJECTORY_SIZE, :, :, 0:6]

        e_char = self.char_net(input_trajectory)

        print("In ToMnet-N: ")
        print("input_trajectory: ", input_trajectory.shape)
        print("input_current_state: ", input_current_state.shape)
        print("e_char SHAPE: ", e_char.shape)
        print("e_char TYPE", type(e_char))
        print("e_char", e_char)

        # --------------------------------------------------------------
        # Paper codes
        # (16, 12, 12, 6) + (16, 8) ->
        # (16, 12, 12, 6) + (16, 8+4zero, 12repeat, 1) ->
        # (16, 12, 12, 7)   # NEW VERSION
        # (16, 12, 12, 8) + (16, 1, 12, 8) -> (16, 13, 12, 8)   # OLD VERSION
        # Spatialise and unite different data into one tensor
        # They are automatically decompose in the Pred Net to different data
        # --------------------------------------------------------------
        e_char_new = tf.concat(values=[e_char,e_char], axis = 1) # tf.repeat(e_char, repeats=2, axis=-1)
        e_char_new = e_char_new[..., 0:12]
        print("Before Concatenation: ", e_char.shape)
        print("After Concatenation: ", e_char_new.shape)

        # print("e_char_new: ", e_char_new.shape)
        e_char_new = tf.expand_dims(e_char_new, axis=-1)
        # print("e_char_new: ", e_char_new.shape)
        e_char_new = tf.repeat(e_char_new, repeats=12, axis=-1)
        # print("e_char_new: ", e_char_new.shape)
        e_char_new = tf.expand_dims(e_char_new, axis=-1)
        print("e_char_new: ", e_char_new.shape)
        print("input_current_state: ", input_current_state.shape)
        input_current_state = tf.cast(input_current_state, tf.float32)

        mix_data = tf.keras.layers.Concatenate(axis=-1)([input_current_state, e_char_new])

        print("mix_data (pred input): ", mix_data.shape)

        pred = self.pred_net(mix_data)
        output = pred
        return output

    ### This is a trick to view shapes in summary() via
    ### model.model().summary()
    def model(self):
        x = tf.keras.Input(shape=self.INPUT_SHAPE)
        return tf.keras.Model(inputs=[x], outputs=self.call(x))

## Data Loader

In [15]:
### Loader
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
class DataHandler(mp.ModelParameter):

The class for parsing txt data.

Note:
  Inherit mp.ModelParameter to share model constants.

@author: Chuang, Yun-Shiuan; Edwinn
"""

"""
The data stored like: 1x12x12x10. 1 - Time Step, 12x12 - Map Resolution, 10 - Depth (1 walls, 1 player, 4 goals, 4 actions)
"""

import os
import sys
import random
import numpy as np
from random import shuffle
import re

class DataHandler:

    def __init__(self, ts, w, h, d):
        self.MAX_TRAJECTORY_SIZE = ts # 20-50
        self.MAZE_WIDTH = w # 12
        self.MAZE_HEIGHT = h # 12
        self.MAZE_DEPTH_TRAJECTORY = d # 11

        # Constants to keep track on standsrd
        # At which games are saved
        self.MAZE_LINE_START = 2
        self.MAZE_LINE_END = self.MAZE_WIDTH + 2
        self.CONSUMED_GOAL = self.MAZE_LINE_END + 1
        self.TRAJ_LENGTH = self.CONSUMED_GOAL + 1
        self.TRAJ_START = self.TRAJ_LENGTH + 1

    # It loads full trajectory, sequence of actions and consumed goal per game
    def load_all_games(self, directory, use_percentage=1):

        # Get names of games
        files = os.listdir(directory)
        r = re.compile(".*.txt")
        files = list(filter(r.match, files))
        Nfiles = len(files)
        Nfraction = int(np.ceil(use_percentage * Nfiles))   # Apply a fraction division
        files = files[:Nfraction]
        print("----")
        print("Saved Games found: ", Nfiles)
        print("Saved Games loaded: ", Nfraction)
        print("Percentage of loaded games: ", use_percentage*100, "%")
        print("Games names: ", files)

        # Save all trajectories and labels
        trajectories = [] # np.empty([1, self.MAZE_WIDTH, self.MAZE_HEIGHT, self.MAZE_DEPTH_TRAJECTORY])
        actions = [] # np.empty(1)
        labels = [] # np.empty(1)

        # ------------------------------------------------------------------
        # Parse file one by one
        # ------------------------------------------------------------------
        j = 0  # for tracking progress (%)
        for i, file in enumerate(files):

            # Read one game
            traj, act, goal = self.read_one_game(filename=os.path.join(directory, file))

            # Append a game to data
            trajectories.append(traj)
            actions.append(act)
            labels.append(goal)

            # Keep track on progress
            if i >= int(np.ceil(j * Nfraction / 100))-1:
                print('Parsed ' + str(j) + '%')
                j += 10
        print("----")

        # ------------------------------------------------------------------
        # Prepare data from games. -> Make many trajectories for each game
        # ------------------------------------------------------------------
        print("Augment data. One game creates many training samples!")

        data_trajectories = []
        data_current_state = []
        data_actions = []
        data_labels = []
        j = 0  # for tracking progress (%)

        # Process Game-per-Game
        for i in range(Nfraction):

            # Consider only games with more than 6 moves
            if trajectories[i].shape[0] < 6:
                continue

            # Prepare data from one game
            data_trajectories1, data_current_state1, data_actions1, data_labels1 = self.generate_data_from_game(trajectories=trajectories[i],                                                                                                 actions=actions[i],
                                                                                                                labels=labels[i])
            # Append to a single structure
            data_trajectories.append(data_trajectories1)
            data_current_state.append(data_current_state1)
            data_actions.append(data_actions1)
            data_labels.append(data_labels1)

            # Keep track on progress
            if i >= int(np.ceil(j * Nfraction / 100))-1:
                print('Augmented data ' + str(j) + '%')
                j += 10

        print("----")

        # ------------------------------------------------------------------
        # Split the data  to Train / Test / Valid
        # ------------------------------------------------------------------
        print("Create training/testing/validation sets")

        train_traj, test_traj, valid_traj, \
        train_current, test_current, valid_current, \
        train_goal, test_goal, valid_goal, \
        train_act, test_act, valid_act = self.split_and_shaffle(data_trajectories=data_trajectories,
                                                                  data_current_state=data_current_state,
                                                                  data_actions=data_actions,
                                                                  data_labels=data_labels)

        print("----")

        return train_traj, test_traj, valid_traj, \
               train_current, test_current, valid_current, \
               train_goal, test_goal, valid_goal, \
               train_act,  test_act,  valid_act

    def load_all_games_v2(self, directory, use_percentage = 1.0):

        # Get names of games
        files = os.listdir(directory)
        r = re.compile(".*.txt")
        files = list(filter(r.match, files))
        Nfiles = len(files)
        Nfraction = int(np.ceil(use_percentage * Nfiles))  # Apply a fraction division
        files = files[:Nfraction]
        print("----")
        print("Saved Games found: ", Nfiles)
        print("Saved Games loaded: ", Nfraction)
        print("Percentage of loaded games: ", use_percentage * 100, "%")
        print("Games names: ", files)

        # Save all trajectories and labels
        trajectories = []  # np.empty([1, self.MAZE_WIDTH, self.MAZE_HEIGHT, self.MAZE_DEPTH_TRAJECTORY])
        actions = []  # np.empty(1)
        labels = []  # np.empty(1)

        # ------------------------------------------------------------------
        # 1. Load each game one by one
        # ------------------------------------------------------------------
        j = 0  # for tracking progress (%)
        for i, file in enumerate(files):

            # Read one game
            traj, act, goal = self.read_one_game(filename=os.path.join(directory, file))

            # Append a game to data
            trajectories.append(traj)
            actions.append(act)
            labels.append(goal)

            # Keep track on progress
            if i >= int(np.ceil(j * Nfraction / 100)) - 1:
                print('Parsed ' + str(j) + '%')
                j += 10
        print("----")

        # ------------------------------------------------------------------
        # 2. Make many Trajectory-Current state pairs from all  games
        # ------------------------------------------------------------------
        print("Augment data. One game creates many training samples!")

        data_trajectories = []
        data_current_state = []
        data_actions = []
        data_labels = []
        j = 0  # for tracking progress (%)

        # Process Game-per-Game
        for i in range(Nfraction):

            # Consider only games with more than 6 moves
            if trajectories[i].shape[0] < 6:
                continue

            # Prepare data from one game
            # The dimensions differ, so only list is applicable (no numpy arrays)
            data_trajectories1, data_current_state1, \
            data_actions1, data_labels1 = self.generate_data_from_game(
                trajectories=trajectories[i],
                actions=actions[i],
                labels=labels[i])

            # Append to a single structure
            data_trajectories.append(data_trajectories1)
            data_current_state.append(data_current_state1)
            data_actions.append(data_actions1)
            data_labels.append(data_labels1)

            # Keep track on progress
            if i >= int(np.ceil(j * Nfraction / 100)) - 1:
                print('Augmented data ' + str(j) + '%')
                j += 10

        print("----")

        # ------------------------------------------------------------------
        # 2. Putting all data together
        # ------------------------------------------------------------------

        # data_trajectories1 shape is ()
        all_games = {
            "traj_history": data_trajectories,
            "current_state_history": data_current_state,
            "actions_history": data_actions
        }

        return all_games


    def load_one_game(self, directory):

        files = os.listdir(directory)
        r = re.compile(".*.txt")
        files = list(filter(r.match, files))

        # Load the game with min 10 steps
        traj = []
        act = []
        goal = []
        game_length = 0
        while game_length < 10:
            one_game = random.choice(files)
            filename = os.path.join(directory, one_game)
            traj, act, goal = self.read_one_game(filename)
            game_length = traj.shape[0]

        traj_history, current_state_history, actions_history, _ = self.generate_data_from_game(traj, act, goal)

        single_game = {
            "traj": traj,   # Original trajectory
            "act": act,
            "goal": goal,
            "ToM":
                {
                    "traj_history": traj_history,   # Sequence of trajectories for ToMnet predictions. NO ZERO PADDING HERE!
                    "current_state_history": current_state_history,
                    "actions_history": actions_history
                }
        }

        return single_game

    # Returns trajectory, actions and consumed goal
    # For a single game
    def read_one_game(self, filename):
        '''
            Return
                traj - (ActionsInGame x MapWidth x MapHeight x MapDepth) (15x12x12x10)
                actions - (ActionsInGame) (array of numbers representing actions)
                goal - (ActionsInGame)  (array of the same goal *For Experiment 1*)
        '''

        traj = np.empty((1, self.MAZE_WIDTH, self.MAZE_HEIGHT, self.MAZE_DEPTH_TRAJECTORY))
        act  = np.empty(1, dtype=np.int8)
        goal = np.empty(1, dtype=np.int8)

        # output.shape(100, 12, 12, 10) where 100 is Max Trajectory Size, 12x12 is WidthxHeight and 10 is Depth (1walls + 1player + 4goals + 4actions)
        output = np.zeros((self.MAZE_WIDTH, self.MAZE_HEIGHT, self.MAZE_DEPTH_TRAJECTORY, self.MAX_TRAJECTORY_SIZE))
        label = ''
        steps = []
        with open(filename) as fp:
            lines = list(fp)
            maze = lines[self.MAZE_LINE_START:self.MAZE_LINE_END]

            # Parse maze to 2d array, remove boundary walls.
            for i in range(self.MAZE_WIDTH):
                maze[i] = list(maze[i])
                maze[i] = maze[i][1:len(maze[i]) - 2]   # Transform: #row#\n -> row

            # Original maze (without walls)
            np_maze = np.array(maze)

            # Plane for obstacles
            np_obstacles = np.where(np_maze == '#', 1, 0).astype(np.int8)

            # Plane for agent's initial position
            np_agent = np.where(np_maze == 'O', 1, 0).astype(np.int8)

            # Plane for goals
            targets = ['A', 'B', 'C', 'D']  # for the simplified 4-targets mazes
            np_targets = np.repeat(np_maze[:, :, np.newaxis], len(targets), axis=2)
            for target, i in zip(targets, range(len(targets))):
                np_targets[:, :, i] = np.where(np_maze == target, 1, 0)
            np_targets = np_targets.astype(int)

            # Save Consumed Goal
            goal_line = lines[self.CONSUMED_GOAL]
            _, goal_sym = goal_line.split(" : ")
            goal_sym = goal_sym[0]
            goal_num = self.goal_sym_to_num(goal_sym)

            # Get Trajectory Length
            Ntraj_line = lines[self.TRAJ_LENGTH]
            _, Ntraj = Ntraj_line.split(": ")
            Ntraj = int(Ntraj)

            # Save Actions & Save Trajectory
            trajectory = lines[self.TRAJ_START : self.TRAJ_START + Ntraj]
            agent_locations = []
            for i, tau in enumerate(trajectory):
                # Decompose
                tau = tau[:len(tau) - 1]  # Transform: 'output\n' -> 'output'
                tmp = tau.split(" : ")
                pos = tmp[0]
                pos = pos[1:-1]
                row, col = pos.split(", ")

                # Save
                # NOTE: first element in act & goal are trash values and MUST be replaced
                if i == 0:
                    agent_locations.append([int(row), int(col)])
                    act[0] = int(tmp[1])
                    goal[0] = goal_num  # self.sym_to_goal(tmp[2], consumed=)
                else:
                    agent_locations.append([int(row), int(col)])
                    act = np.append(act, int(tmp[1]))
                    goal = np.append(goal, goal_num) # self.sym_to_goal(tmp[2], consumed=)

                    # Update Agent Location Tensor
                    np_agent = np.zeros(shape=(self.MAZE_WIDTH, self.MAZE_HEIGHT), dtype = np.int8)
                    np_agent[int(row), int(col)] = 1

                # Make Trajectory Tensor
                np_actions = np.zeros((self.MAZE_WIDTH, self.MAZE_HEIGHT, 4), dtype=np.int8)
                a = act[i]
                np_actions[int(row), int(col), a] = 1

                np_tensor = np.dstack((np_obstacles, np_agent, np_targets, np_actions)) # (1walls + 1player + 4goals + 4actions)
                steps.append(np_tensor)
                traj = np.array(steps)

        fp.close()
        return traj, act, goal

    def goal_sym_to_num(self, goal_sym):
        out = 0
        if goal_sym == "A":
            out = 1
        elif goal_sym == "B":
            out = 2
        elif goal_sym == "C":
            out = 3
        elif goal_sym == "D":
            out = 4
        else:
            raise ValueError("ERROR: wrong goal sym was given!")
        return out

    # It deconstructs each game to a series of samples.
    # Single trajectory becomes a sequence of rising trajectories with same
    # Consumed goals
    def generate_data_from_game(self, trajectories, actions, labels):

        # Make full data from a game
        data_trajectories = []
        data_current_state = []
        data_actions = []
        data_labels = []

        MIN_ACTIONS = 5
        for i in range(MIN_ACTIONS, trajectories.shape[0]):
            data_trajectories.append(trajectories[0:i,...])     # Trajectory to the state
            data_current_state.append(trajectories[i,..., 0:6]) # Current state # (1walls + 1player + 4goals)
            data_actions.append(actions[i,...])                 # Next Action
            data_labels.append(labels[i,...])                   # Consumed Goal

        return data_trajectories, data_current_state, data_actions, data_labels

    def split_and_shaffle(self, data_trajectories, data_current_state, data_actions, data_labels):

        N_Total = len(data_trajectories)
        N_train = int(np.ceil(N_Total * 0.65))
        N_test  = int(np.ceil(N_Total * 0.20))
        N_valid = int(np.ceil(N_Total * 0.15))

        print("Total number of games after filtration: ", N_Total)
        print("Games for training: ", N_train)
        print("Games for testing: ", N_test)
        print("Games for validation: ", N_valid)

        total_indexes = list(range(N_Total))
        shuffle(total_indexes)

        train_indexes = total_indexes[0:N_train]
        test_indexes  = total_indexes[N_train:N_train+N_test]
        valid_indexes = total_indexes[N_train+N_test:]

        train_traj = [data_trajectories[i] for i in train_indexes]
        test_traj = [data_trajectories[i] for i in test_indexes]
        valid_traj = [data_trajectories[i] for i in valid_indexes]

        train_current = [data_current_state[i] for i in train_indexes]
        test_current = [data_current_state[i] for i in test_indexes]
        valid_current = [data_current_state[i] for i in valid_indexes]

        train_goal = [data_labels[i] for i in train_indexes]
        test_goal = [data_labels[i] for i in test_indexes]
        valid_goal = [data_labels[i] for i in valid_indexes]

        train_act = [data_actions[i] for i in train_indexes]
        test_act = [data_actions[i] for i in test_indexes]
        valid_act = [data_actions[i] for i in valid_indexes]

        # Unpack lists in data to be a single list of all games
        train_traj = sum(train_traj, [])
        test_traj = sum(test_traj, [])
        valid_traj = sum(valid_traj, [])

        train_current = sum(train_current, [])
        test_current = sum(test_current, [])
        valid_current = sum(valid_current, [])

        train_goal = sum(train_goal, [])
        test_goal = sum(test_goal, [])
        valid_goal = sum(valid_goal, [])

        train_act = sum(train_act, [])
        test_act = sum(test_act, [])
        valid_act = sum(valid_act, [])

        print("Time Steps for training: ", len(train_act))
        print("Time Steps for testing: ", len(test_act))
        print("Time Steps for validation: ", len(valid_act))

        return train_traj, test_traj, valid_traj, \
               train_current, test_current, valid_current, \
               train_goal, test_goal, valid_goal, \
               train_act, test_act, valid_act

## Data Processing

In [16]:
### Data Processing
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
class DataHandler(mp.ModelParameter):

The class for parsing txt data.

Note:
  Inherit mp.ModelParameter to share model constants.

@author: Chuang, Yun-Shiuan; Edwinn
"""

"""
The data stored like: 1x12x12x10. 1 - Time Step, 12x12 - Map Resolution, 10 - Depth (1 walls, 1 player, 4 goals, 4 actions)
"""

import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms
import numpy as np
from random import shuffle
import re

class DataProcessor:

    def __init__(self, ts, w, h, d):
        self.MAX_TRAJECTORY_SIZE = ts # 20-50
        self.MAZE_WIDTH = w # 12
        self.MAZE_HEIGHT = h # 12
        self.MAZE_DEPTH = d # 10 (1player + 1wall + 4goals + 4 actions = 10)

        # Constants to keep track on standsrd
        # At which games are saved
        self.MAZE_LINE_START = 2
        self.MAZE_LINE_END = self.MAZE_WIDTH + 2
        self.CONSUMED_GOAL = self.MAZE_LINE_END + 1
        self.TRAJ_LENGTH = self.CONSUMED_GOAL + 1
        self.TRAJ_START = self.TRAJ_LENGTH + 1

    def zero_pad_single_game(self, max_elements, single_game):

        # A single game has several trajectories
        all_trajectories = single_game["ToM"]["traj_history"]
        N = len(all_trajectories)
        TrajZeroPad = []

        for i in range(N):
            zero_pad_trajectory = np.zeros(shape=(max_elements,
                                        self.MAZE_WIDTH,
                                        self.MAZE_HEIGHT,
                                        self.MAZE_DEPTH))
            current_trajectory = all_trajectories[i]
            Nt = len(current_trajectory)  # Number of real steps in the current trajectory
            if Nt > max_elements:
                zero_pad_trajectory = current_trajectory[-max_elements:]
            else:
                zero_pad_trajectory[:Nt, ...] = current_trajectory
            TrajZeroPad.append(zero_pad_trajectory)

        single_game["ToM"]["traj_history_zp"] = TrajZeroPad

        return single_game


    # It adds zeros at the beginning of the trajectories
    def zero_padding(self, max_elements, DictData):

        DataZeroPad = DictData.copy()

        for key, value in DictData.items():

            if key[-len("traj"):] == "traj":
                print("Apply Zero-Padding to " + key + "... ")
                all_trajectories = DictData[key]
                N = len(all_trajectories)
                TrajZeroPad = []

                # Fill the last elements with real trajectory (implement pre-zero padding)
                for i in range(N):
                    zero_pad_trajectory = np.zeros(shape=(max_elements,
                                                          self.MAZE_WIDTH,
                                                          self.MAZE_HEIGHT,
                                                          self.MAZE_DEPTH))
                    current_trajectory = all_trajectories[i]
                    Nt =  len(current_trajectory) # Number of real steps in the trajectory
                    if Nt > max_elements:
                        zero_pad_trajectory = current_trajectory[-max_elements:]
                    else:
                        zero_pad_trajectory[:Nt, ...] = current_trajectory

                    # if key == "valid_traj":
                    #     actions = DictData["valid_act"]
                    #     ac = actions[i] # Getting an action TOMnet must predict
                    #     print("Next Action should be: ", ac)
                    #     self.one_trajectory_validation(zero_pad_trajectory)
                    #     cur_states = DictData["valid_current"]
                    #     cur_state = cur_states[i]
                    #     self.current_validation(cur_state)
                    TrajZeroPad.append(zero_pad_trajectory)

                DataZeroPad[key] = TrajZeroPad

        print("Zero Padding was applied!")

        return DataZeroPad

        # It adds zeros at the beginning of the trajectories
    def zero_padding_v2(self, max_elements, all_games):

        # all_games = {
        #     "traj_history": traj_history,
        #     "traj_history_zp": traj_history_zp                    # Trajectory with Zero Padding
        #     "current_state_history": current_state_history,
        #     "actions_history": actions_history
        # }

        uniform_shape = (1, max_elements, self.MAZE_WIDTH, self.MAZE_HEIGHT, self.MAZE_DEPTH)

        zero_padded_trajectories = []   # ndarray, not list
        unfolded_current_states = []
        unfolded_action_history = []
        all_trajectories = all_games["traj_history"]
        all_current_states = all_games["current_state_history"]
        all_actions = all_games["actions_history"]
        N_all_games = len(all_trajectories)

        # Go one by one game
        # Where each game consist of many trajectories
        tracker_var = 0
        for i in range(N_all_games):

            traj = all_trajectories[i]
            cur = all_current_states[i]
            act = all_actions[i]
            N_traj = len(traj) # traj.shape[0]      # Number of trajectories in current game

            for j in range(N_traj):

                ### Init single piece of data from a game
                current_trajectory = traj[j]
                current_state = cur[j]
                current_action = act[j]

                ### Trajectory
                zero_pad_trajectory = np.zeros(shape=uniform_shape)
                Nt = current_trajectory.shape[0]  # Number of real steps in the trajectory

                # Save game in a bigger array so the rest is fiiled with zeros
                if Nt > max_elements:
                    zero_pad_trajectory[0, ...] = current_trajectory[-max_elements:]
                else:
                    zero_pad_trajectory[0, 0:Nt, ...] = current_trajectory

                zero_padded_trajectories.append(zero_pad_trajectory[0,...])

                ### Current state
                unfolded_current_states.append(current_state)

                ### Action
                unfolded_action_history.append(current_action)


            # Keep track on progress
            if i >= int(N_all_games * tracker_var / 100) - 2:
                print('Zero-Padded data ' + str(tracker_var) + '%')
                tracker_var += 5

        zero_padded_trajectories = np.array(zero_padded_trajectories)
        unfolded_current_states = np.array(unfolded_current_states)
        unfolded_action_history = np.array(unfolded_action_history)
        all_games["traj_history_zp"] = zero_padded_trajectories
        all_games["current_state_history"] = unfolded_current_states
        all_games["actions_history"] = unfolded_action_history

        print(all_games["traj_history_zp"].shape)

        print("Zero Padding was applied!")

        return all_games

    # Traj: 20x12x12x10
    # Cur: 12x12x6
    # ToMnet input: 21x12x12x10
    # Cur: 12x12x6 -> 1x12x12x10
    # Concatenate 20x12x12x10 + 1x12x12x10 -> 21x12x12x10
    def unite_traj_current(self, DictData):

        UniData = {
            "train_input": [np.zeros(shape=(self.MAX_TRAJECTORY_SIZE+1,
                                     self.MAZE_WIDTH,
                                     self.MAZE_HEIGHT,
                                     self.MAZE_DEPTH))] * len(DictData["train_traj"]),
            "test_input": [np.zeros(shape=(self.MAX_TRAJECTORY_SIZE + 1,
                                     self.MAZE_WIDTH,
                                     self.MAZE_HEIGHT,
                                     self.MAZE_DEPTH))] * len(DictData["test_traj"]),
            "valid_input": [np.zeros(shape=(self.MAX_TRAJECTORY_SIZE + 1,
                                    self.MAZE_WIDTH,
                                    self.MAZE_HEIGHT,
                                    self.MAZE_DEPTH))] * len(DictData["valid_traj"]),
            "train_goal": DictData["train_goal"],
            "test_goal": DictData["test_goal"],
            "valid_goal": DictData["valid_goal"],
            "train_act": DictData["train_act"],
            "test_act": DictData["test_act"],
            "valid_act": DictData["valid_act"]
        }

        print("-----")
        for key, value in UniData.items():

            if key[-len("input"):] == "input":

                print("Apply concatenation to " + key + "... ")
                # Add Trajectory in the beginning
                purpose = key[:-(len("input")+1)] # train / test / valid
                for i in range(len(DictData[purpose + "_traj"])):
                    UniData[key][i][0:self.MAX_TRAJECTORY_SIZE] = DictData[purpose + "_traj"][i]

                # Add Current in the end
                for i in range(len(DictData[purpose + "_traj"])):
                    # 12x12x6 -> 12x12x10
                    data_expanded = np.repeat(DictData[purpose + "_current"][i], repeats=2, axis=-1)
                    data_expanded = data_expanded[..., 0:10]
                    UniData[key][i][self.MAX_TRAJECTORY_SIZE] = data_expanded

        print("Concatenation is finished")
        return UniData


    def unite_single_traj_current(self, single_game):

        print("Apply concatenation to a single trajectory... ")
        trajectories_list = single_game["ToM"]["traj_history"]
        current_state_list = single_game["ToM"]["current_state_history"]
        assert len(trajectories_list) == len(current_state_list)

        N = len(trajectories_list)
        united_data = []
        for i in range(N):

            cur_traj = trajectories_list[i]     # 15x12x12x10
            cur_state = current_state_list[i]   # 12x12x6

            cur_state_expanded = np.zeros(shape=(self.MAZE_WIDTH, self.MAZE_HEIGHT, self.MAZE_DEPTH))
            cur_state_expanded[..., 0:6] = cur_state # 12x12x6 -> 12x12x10

            Ntraj = cur_traj.shape[0]
            concat_shape = (Ntraj + 1, self.MAZE_WIDTH, self.MAZE_HEIGHT, self.MAZE_DEPTH)
            concatenated_data = np.zeros(shape=concat_shape)
            concatenated_data[0:Ntraj] = cur_traj
            concatenated_data[Ntraj] = cur_state_expanded

            united_data.append(concatenated_data)

        single_game["ToM"]["united_input"] = united_data

        return single_game


    def validate_data(self, DictData):

        """
        Data = {"train_traj":train_traj,
                "test_traj":test_traj,
                "valid_traj":valid_traj,
                "train_current":train_current,
                "test_current":train_current,
                "valid_current":valid_current,
                "train_goal": train_goal,
                "test_goal": test_goal,
                "valid_goal": valid_goal,
                "train_act": train_act,
                "test_act": test_act,
                "valid_act": valid_act}
        """

        print("----")
        print("Data validation... ")

        for key, value in DictData.items():

            if key[-len("traj"):] == "traj":
                if key == "train_traj":
                    self.trajectory_validation(value)
            elif key[-len("current"):] == "current":
                self.current_validation(value)
            elif key[-len("goal"):] == "goal":
                self.goal_validation(value)
            elif key[-len("act"):] == "act":
                self.act_validation(value)
            else:
                raise ValueError("Wrong key inside Data dictionary!")

        print("----")

    def one_trajectory_validation(self, traj):

        for i in range(len(traj)):
            # Take i-th frame of trajectory
            frame_1 = traj[i]

            walls = frame_1[..., 0]
            player = frame_1[..., 1]
            goal1 = frame_1[..., 2]
            goal2 = frame_1[..., 3]
            goal3 = frame_1[..., 4]
            goal4 = frame_1[..., 5]
            act1 = frame_1[..., 6]
            act2 = frame_1[..., 7]
            act3 = frame_1[..., 8]
            act4 = frame_1[..., 9]

            to_draw = {
                "walls": walls,
                "player": player,
                "walls2": walls,
                "player2": player,
                "goal1": goal1,
                "goal2": goal2,
                "goal3": goal3,
                "goal4": goal4,
                "act1(UP)": act1,
                "act2(RIGHT)": act2,
                "act3(DOWN)": act3,
                "act4(LEFT)": act4
            }

            ROW = 3
            COL = 4
            fig, axs = plt.subplots(ROW, COL, figsize=(7, 6))
            row = 0
            col = 0
            for key, value in to_draw.items():
                axs[row, col].imshow(value)
                axs[row, col].set_title(key + "::" + str(i))
                axs[row, col].axis("off")
                col = col + 1
                if col == COL:
                    col = 0
                    row = row + 1
            plt.show()

    def trajectory_validation(self, traj):
        print("Trajectory validation... ")

        for index, tau in enumerate(traj):
            if index == 0:

                for i in range(tau.shape[0]):

                    # Take i-th frame of trajectory
                    frame_1 = tau[i]

                    walls = frame_1[..., 0]
                    player = frame_1[..., 1]
                    goal1 = frame_1[..., 2]
                    goal2 = frame_1[..., 3]
                    goal3 = frame_1[..., 4]
                    goal4 = frame_1[..., 5]
                    act1 = frame_1[..., 6]
                    act2 = frame_1[..., 7]
                    act3 = frame_1[..., 8]
                    act4 = frame_1[..., 9]

                    fig, ax = plt.subplot_mosaic([
                        ["walls",  "player"],
                        ["goal 1", "goal 2"],
                        ["goal 3", "goal 4"],
                        ["act 1",  "act 2"],
                        ["act 3", "act 4"]
                    ], constrained_layout=True)

                    # Draw walls
                    ax["walls"].set_title("Walls-" + str(i))
                    ax["walls"].imshow(walls)

                    # Draw Player
                    ax["player"].set_title("Player-" + str(i))
                    ax["player"].imshow(player)

                    # Draw Goal 1
                    ax["goal 1"].set_title("Goal 1-" + str(i))
                    ax["goal 1"].imshow(goal1)

                    # Draw Goal 2
                    ax["goal 2"].set_title("Goal 2-" + str(i))
                    ax["goal 2"].imshow(goal2)

                    # Draw Goal 3
                    ax["goal 3"].set_title("Goal 3-" + str(i))
                    ax["goal 3"].imshow(goal3)

                    # Draw Goal 4
                    ax["goal 4"].set_title("Goal 4-" + str(i))
                    ax["goal 4"].imshow(goal4)

                    # Draw Action 1
                    ax["act 1"].set_title("Action 1-" + str(i))
                    ax["act 1"].imshow(act1)

                    # Draw Action 2
                    ax["act 2"].set_title("Action 2-" + str(i))
                    ax["act 2"].imshow(act2)

                    # Draw Action 3
                    ax["act 3"].set_title("Action 3-" + str(i))
                    ax["act 3"].imshow(act3)

                    # Draw Action 4
                    ax["act 4"].set_title("Action 4-" + str(i))
                    ax["act 4"].imshow(act4)

                    plt.show()



    def current_validation(self, cur):
        print("Current state validation... ")

        to_draw = {
            "walls": cur[..., 0],
            "player": cur[..., 1],
            "walls2": cur[..., 0],
            "player2": cur[..., 1],
            "goal1": cur[..., 2],
            "goal2": cur[..., 3],
            "goal3": cur[..., 4],
            "goal4": cur[..., 5],
        }

        ROW = 2
        COL = 4
        fig, axs = plt.subplots(ROW, COL, figsize=(7, 6))
        row = 0
        col = 0
        for key, value in to_draw.items():
            axs[row, col].imshow(value)
            axs[row, col].set_title(key + ":: Current State")
            axs[row, col].axis("off")
            col = col + 1
            if col == COL:
                col = 0
                row = row + 1
        plt.show()

    def goal_validation(self, traj):
        print("Goal validation... ")

    def act_validation(self, traj):
        print("Action validation... ")

# Main

## Main functions

In [17]:
# -*- coding: utf-8 -*-
"""
class Model(mp.ModelParameter):

The class for training the ToMNET model.

Note:
  Inherit mp.ModelParameter to share model constants.

@author: Chuang, Yun-Shiuan; Edwinn
"""
import os
import sys
import time
import datetime
import pandas as pd
import tensorflow as tf
import argparse
import numpy as np
import matplotlib.pyplot as plt

# --------------------------------------------------------
# CONSTANTS and Parameters
# --------------------------------------------------------
N_ECHAR = 8
N_RESBLOCKS = 64
LEARNING_RATE = 0.0001 / 5
BATCH_SIZE = 32
ROW = 12
COL = 12
DEPTH = 10
MAX_TRAJ = 15
EPOCHS = 15 # 150 (no need to have more than 150)

LOAD_PERCENTAGE = 0.001 # 0.1% = 5 games. 0.02% = 1 game

MODEL_PATH = os.path.join('drive', 'MyDrive', 'Dissertation', 'Games', 'Model')
TESTING_GAME_PATH = os.path.join('drive', 'MyDrive', 'Dissertation', 'Games', 'Overfit')
TRAINING_GAMES_PATH = os.path.join('drive', 'MyDrive', 'Dissertation', 'Games', 'Experiment 2')

def dict_to_tensors(Dict):

    def make_y_outputs(folded_list):
        list_of_arrays = folded_list
        indices = list(np.concatenate([list_of_arrays], axis=0))
        indices = [x - 1 for x in indices]  # 1-4 --> 0-3
        depth = 4
        return tf.one_hot(indices, depth)

    X_Train = tf.convert_to_tensor(Dict["train_input"])
    X_Test = tf.convert_to_tensor(Dict["test_input"])
    X_Valid = tf.convert_to_tensor(Dict["valid_input"])

    Y_goal_Train = make_y_outputs(Dict["train_goal"])
    Y_goal_Test = make_y_outputs(Dict["test_goal"])
    Y_goal_Valid = make_y_outputs(Dict["valid_goal"])

    Y_act_Train = make_y_outputs(Dict["train_act"])
    Y_act_Test = make_y_outputs(Dict["test_act"])
    Y_act_Valid = make_y_outputs(Dict["valid_act"])

    return X_Train, X_Test, X_Valid, \
           Y_goal_Train, Y_goal_Test, Y_goal_Valid, \
           Y_act_Train, Y_act_Test, Y_act_Valid,

def save_game_to_draw(full_trajectory, predicted_actions):
    print("Puk-puk")

def load_training_games(directory, load_percentage=0.2):
    # --------------------------------------------------------
    # 1. Load Data
    # --------------------------------------------------------
    data_handler = DataHandler(ts=MAX_TRAJ,
                              w=ROW,
                              h=COL,
                              d=DEPTH)
    #
    # all_games = {
    #     "traj_history": traj_history,
    #     "traj_history_zp": traj_history_zp                    # Trajectory with Zero Padding
    #     "current_state_history": current_state_history,
    #     "actions_history": actions_history
    # }
    all_games = data_handler.load_all_games_v2(directory=directory, use_percentage=load_percentage)

    # --------------------------------------------------------
    # 2. Pre-process data - Zero Padding
    # --------------------------------------------------------
    data_processor = DataProcessor(ts=MAX_TRAJ,
                                    w=ROW,
                                    h=COL,
                                    d=DEPTH)

    all_games = data_processor.zero_padding_v2(max_elements=MAX_TRAJ,
                                                all_games=all_games)

    # Make Tensors from List
    indices = all_games["actions_history"] # In Experiment 2 actions already saved like 0-3. For Experiment 1 use -> -1 ##########- 1  # 1-4 --> 0-3
    depth = 4
    X_train_traj = tf.convert_to_tensor(all_games["traj_history_zp"], dtype=tf.float32)
    X_train_current = tf.convert_to_tensor(all_games["current_state_history"], dtype=tf.float32)
    Y_act_Train = tf.one_hot(indices, depth)

    # return X_Train, Y_act_Train
    return X_train_traj, X_train_current, Y_act_Train

def load_one_game(directory):
    # --------------------------------------------------------
    # 1. Load Data
    # --------------------------------------------------------
    data_handler = DataHandler(ts=MAX_TRAJ,
                                          w=ROW,
                                          h=COL,
                                          d=DEPTH)
    #
    # single_game = {
    #     "traj": traj,  # Original trajectory
    #     "act": act,
    #     "goal": goal,
    #     "ToM":
    #         {
    #             "traj_history": traj_history,
    #             "traj_history_zp": traj_history_zp # Trajectory with Zero Padding
    #             "current_state_history": current_state_history,
    #             "actions_history": actions_history
    #         }
    # }
    single_game = data_handler.load_one_game(directory=directory)

    # --------------------------------------------------------
    # 2. Pre-process data - Zero Padding
    # --------------------------------------------------------
    data_processor = DataProcessor(ts=MAX_TRAJ,
                                    w=ROW,
                                    h=COL,
                                    d=DEPTH)

    # data_processor.validate_data(Data)

    # single_game = data_processor.unite_single_traj_current(single_game)

    single_game = data_processor.zero_pad_single_game(max_elements=MAX_TRAJ,
                                                      single_game=single_game)

    # Make Tensors from List
    indices = [x - 1 for x in single_game["ToM"]["actions_history"]]  # 1-4 --> 0-3
    depth = 4
    X_train_traj = tf.convert_to_tensor(single_game["ToM"]["traj_history_zp"], dtype=tf.float32)
    X_train_current = tf.convert_to_tensor(single_game["ToM"]["current_state_history"], dtype=tf.float32)
    Y_act_Train = tf.one_hot(indices, depth)

    # return X_Train, Y_act_Train
    return X_train_traj, X_train_current, Y_act_Train

def train_model(X_train_traj, X_train_current, Y_act_Train):

    # --------------------------------------------------------
    # 3. Create and set the model
    # --------------------------------------------------------
    print("----")
    print("Create a model")

    t = ToMnet(ts=MAX_TRAJ,
                      w=ROW,
                      h=COL,
                      d=DEPTH,
                      Ne_char=N_ECHAR,
                      N_res_blocks=N_RESBLOCKS,
                      filters=64)
    t.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(LEARNING_RATE, clipnorm=1.0),
              metrics=['accuracy'])

    t.fit(x=[X_train_traj, X_train_current], y=Y_act_Train,
          epochs=1, batch_size=BATCH_SIZE, verbose=2)

    t.summary()

    # --------------------------------------------------------
    # 4. Train the model
    # --------------------------------------------------------
    print("X_traj shape: ", X_train_traj.shape)
    print("Train a Model")
    history = t.fit(x=[X_train_traj, X_train_current], y=Y_act_Train,
                    epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=2)
    plot_history(history)
    save_history(history)

    t.save(MODEL_PATH)

def plot_history(history):

    plt.plot(
        np.arange(1, EPOCHS + 1),
        history.history['loss'],
        label='Loss', lw=3
    )
    plt.plot(
        np.arange(1, EPOCHS + 1),
        history.history['accuracy'],
        label='Accuracy', lw=3
    )

    plt.title('Evaluation metrics', size=20)
    plt.xlabel('Epoch', size=14)
    plt.legend()
    plt.show()

    plt.plot(
        np.arange(1, EPOCHS + 1),
        history.history['loss'],
        label='Loss', lw=3
    )
    plt.plot(
        np.arange(1, EPOCHS + 1),
        history.history['accuracy'],
        label='Accuracy', lw=3
    )

    plt.ylim(0, 1)
    plt.title('Evaluation metrics', size=20)
    plt.xlabel('Epoch', size=14)
    plt.legend()
    plt.show()
    

def save_history(history, name="TrainHistory.csv"):
    TrainHistory = pd.DataFrame()
    TrainHistory = TrainHistory.append(pd.DataFrame({
        "loss": history.history['loss'],
        "accuracy": history.history['accuracy']
    }))
    TrainHistory.to_csv(name)


def load_model():
    custom_layers = {
        "CustomCnn": CustomCnn,
        "ResBlock": ResBlock,
        "CustomLSTM": CustomLSTM,
    }
    return tf.keras.models.load_model(MODEL_PATH, custom_objects=custom_layers)

# I am only sending a trajectory here
# This trajectory will be divided to Traj and Current state
# And therefore coordinates will be calculated
def predict_game(model, input_data, predict_steps=5):

    # Trajectory Depth saved as - (np_obstacles, np_agent, np_targets, np_actions)
    # Traj shape = BS x TS x W x H x D. 1x5-15x12x12x10

    # Check for Batch_Size dim
    if input_data.shape[0] != 1:
        input_data = tf.expand_dims(input_data, axis=0)

    path_to_save = "../Results/Predictions/Prediction 1/"
    width = input_data.shape[2]
    height = input_data.shape[3]

    # --------------------------------------------------------
    # 1. Build Initial Map (simple map) for rendering
    # --------------------------------------------------------
    simple_map = np.zeros((12, 12), dtype=np.int16)  # 0-path, 1-wall, 2/5-goals, 10-player

    # Put walls
    walls_layer = input_data[0, 0, ..., 0]
    for row in range(width):
        for col in range(height):
            if walls_layer[row, col] == 1:
                simple_map[row, col] = 1

    # Put player
    player_layer = input_data[0, 0, ..., 1]
    for row in range(width):
        for col in range(height):
            if player_layer[row, col] == 1:
                simple_map[row, col] = 10

    # Put goals
    goal_layer = input_data[0, 0, ..., 2:6]
    assert goal_layer.shape[-1] == 4            # Check that there are 4 layers for 4 goals
    for row in range(width):
        for col in range(height):
            if goal_layer[row, col, 0] == 1:
                simple_map[row, col] = 2
            elif goal_layer[row, col, 1] == 1:
                simple_map[row, col] = 3
            elif goal_layer[row, col, 2] == 1:
                simple_map[row, col] = 4
            elif goal_layer[row, col, 3] == 1:
                simple_map[row, col] = 5
    map_df = pd.DataFrame(simple_map)
    map_df.to_csv(path_to_save + str("simple_map.csv"))

    # --------------------------------------------------------
    # 2. Save Full Trajectory
    # --------------------------------------------------------
    full_traj_actions = []

    # Create list of actions saved in trajectory
    TS = input_data.shape[1] - 1   # Trajectory Size. The last frame is current state, no actions are shown there
    for i in range(TS):
        all_action_layers = np.array(input_data[0, i, ..., 6:10])

        # Find which action was performed
        bool_val = False
        for action_number in range(4):
            action_layer = np.array(all_action_layers[..., action_number], dtype=np.int8)
            max_val = action_layer.max()
            bool_val = 1 in action_layer    # np.where(n_array == 1) # Should also work
            bool_val = np.any(bool_val)
            if bool_val:
                full_traj_actions.append(action_number)
                break

        # If no actions were found in a frame - it is a Zero_padding. Finish here
        if not bool_val:
            TS = i
            break

    print("Full trajectory in actions: ", full_traj_actions)

    # Find initial position
    initial_coordinates = list(np.where(player_layer == 1))

    # Create coordinate sequence
    full_traj_coordinates = [initial_coordinates]
    for i in range(TS):
        coordinates = full_traj_coordinates[-1].copy()
        applied_action = full_traj_actions[i]

        dr = 0
        dc = 0
        if applied_action == 0:
            dr = -1
            dc = 0
        elif applied_action == 1:
            dr = 0
            dc = 1
        elif applied_action == 2:
            dr = 1
            dc = 0
        elif applied_action == 3:
            dr = 0
            dc = -1

        coordinates[0] = coordinates[0] + dr
        coordinates[1] = coordinates[1] + dc

        full_traj_coordinates.append(coordinates)

    print("Full trajectory in coordinates: ", full_traj_coordinates)
    full_traj_coordinates_df = pd.DataFrame(full_traj_coordinates)
    full_traj_coordinates_df.to_csv(path_to_save + str("full_traj.csv"))

    # --------------------------------------------------------
    # 3. Save Initial Trajectory
    # --------------------------------------------------------

    Nfull = len(full_traj_coordinates)
    if Nfull - predict_steps < 5:
        raise ValueError("The game is too short! It has only " + str(Nfull) + " moves, while you ask to predict"
                         + str(predict_steps) +
                         " actions. Give at least a game with trajectory length bigger than predicted actions by 5.")
    initial_traj_coordinates = full_traj_coordinates[0:-predict_steps]
    initial_traj_coordinates_df = pd.DataFrame(initial_traj_coordinates)
    initial_traj_coordinates_df.to_csv(path_to_save + str("init_traj.csv"))

    # --------------------------------------------------------
    # 4. Save Predicted Trajectory
    # --------------------------------------------------------

    # Remove Zero-Padding from Trajectory
    # ...
    # Currently it doesn't have zero-padding

    # Traj -> Traj_zero_pad + current state
    input_traj = tf.cast(input_data[0, 0:-predict_steps, ...], tf.float32)
    input_current = tf.cast(input_data[0, -predict_steps, ..., 0:6], tf.float32)
    NeededZeros = MAX_TRAJ - input_traj.shape[0] # Add Zeros up to MAX_TRAJ
    if NeededZeros > 0:
        zero_pad_shape = (NeededZeros, ROW, COL, DEPTH)
        zero_pad = tf.zeros(shape=zero_pad_shape, dtype=tf.float32)
        input_traj = tf.concat(values=[input_traj, zero_pad], axis=0)
    input_traj = tf.expand_dims(input_traj, axis=0)
    input_current = tf.expand_dims(input_current, axis=0)

    input_traj    = tf.cast(input_traj, tf.float32)
    input_current = tf.cast(input_current, tf.float32)

    # Get initial coordinates
    np_input_current = input_current.numpy()
    np_player_postition = np_input_current[0, ..., 1]
    position = list(np.where(np_player_postition == np_player_postition.max()))

    # Make action predictions
    predicted_actions = []
    current_player_coordinates = initial_traj_coordinates[-1].copy()
    coordinates = [position] # [current_player_coordinates]
    for i in range(predict_steps):
        # Get predicted action
        predict_distribution = model.predict([input_traj, input_current])
        predicted_action = np.where(predict_distribution == predict_distribution.max())[1][0]   # Output: 0 - 3
        predicted_actions.append(predicted_action)
        print("Predicted Action: ", predicted_action)

        np_input_traj = input_traj.numpy()
        np_input_current = input_current.numpy()

        # --------------------------------------------------------
        # 4.1 Update layers
        # --------------------------------------------------------

        # Update players coordinates
        old_player_position = coordinates[-1].copy()    # current_player_coordinates.copy()
        player_position = old_player_position.copy()
        if predicted_action == 0:    player_position[0] = player_position[0] - 1
        elif predicted_action == 1:  player_position[1] = player_position[1] + 1
        elif predicted_action == 2:  player_position[0] = player_position[0] + 1
        elif predicted_action == 3:  player_position[1] = player_position[1] - 1

        # Check for safety (map boundaries)
        if player_position[0] > ROW-1: player_position[0] = ROW-1
        if player_position[0] < 0: player_position[0] = 0
        if player_position[1] > COL-1: player_position[1] = COL-1
        if player_position[1] < 0: player_position[1] = 0

        current_player_coordinates = player_position.copy()
        coordinates.append(current_player_coordinates)

        new_player_map = np.zeros(shape=(ROW, COL, 1))
        new_player_map[player_position[0], player_position[1], 0] = 1
        # new_player_map = tf.convert_to_tensor(new_player_map, dtype=tf.float32)

        # Update action layers (ACTION IS ASSIGNED TO CURRENT FRAME, NOT NEW FRAME!!! So it takes old player's position)
        # The old position is saved in Current State
        action_map = np.zeros(shape=(ROW, COL, 4))
        action_map[old_player_position[0], old_player_position[1], predicted_action] = 1
        # old_action_map = tf.convert_to_tensor(old_action_map, dtype=tf.float32)

        # Get wall layer
        wall_map = input_traj[0, 0, ..., 0]
        wall_map = tf.expand_dims(wall_map, axis=-1)
        np_wall_map = wall_map.numpy()

        # Get goals map
        goal_map = input_traj[0, 0, ..., 2:6]
        np_goal_map = goal_map.numpy()

        # --------------------------------------------------------
        # 4.2 Update Trajectory
        # --------------------------------------------------------
        # Indexes:  traj1, traj2, traj3, traj4, traj5, zp,    zp, zp, zp, zp
        # Become:   traj1, traj2, traj3, traj4, traj5, traj6, zp, zp, zp, zp
        # Initial number of zp = predict_steps
        # It decreases with increasing of i
        # Therefore I must update input_traj[0, -predict_step+i, ...]

        np_current_state = np.copy(input_current.numpy())
        np_current_state = np_current_state[0, ...]   # Here I have walls, player and goals for a trajectory frame

        upd_ind = -predict_steps+i
        np_input_traj = input_traj.numpy()
        np_input_traj[0, upd_ind, ...] = np.concatenate((np_current_state, action_map), axis=-1)
        input_traj = tf.convert_to_tensor(np_input_traj, dtype=tf.float32)

        # --------------------------------------------------------
        # 4.3 Update Current State
        # --------------------------------------------------------

        new_current_state = np.concatenate((np_wall_map, new_player_map, np_goal_map), axis=-1)
        new_current_state = tf.convert_to_tensor(new_current_state, dtype=tf.float32)
        new_current_state = tf.expand_dims(new_current_state, axis=0)

        input_current = new_current_state

    coordinates_df = pd.DataFrame(coordinates)
    coordinates_df.to_csv(path_to_save + str("predicted_traj.csv"))

    return predicted_actions

## Additional function for Google Colab

In [21]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [22]:
!ls

drive  sample_data


## Main Script

In [20]:
if __name__ == "__main__":

    # To fix ERROR
    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

    ### Load data
    X_train_traj, X_train_current, Y_act_Train = load_training_games(directory=TRAINING_GAMES_PATH,
                                                                     load_percentage=LOAD_PERCENTAGE)

    print(Y_act_Train)
    ### Train the model
    train_model(X_train_traj, X_train_current, Y_act_Train)

    ### Use trained model
    # model =  load_model()
    #
    # ### Keep training the model
    # # history = model.fit(x=X_Train, y=Y_act_Train,
    # #                 epochs=EPOCHS, batch_size=1, verbose=2)
    # # plot_history(history)
    #
    # ### Test it on one prediction
    # X_train_traj, X_train_current, Y_act_Train = load_one_game(directory=TESTING_GAME_PATH) # To test I load one single game
    #
    # # Pick the longest trajectory, which has 14 moves and 15tf frame is current state
    # input_data_traj = X_train_traj[-6, ...]
    # input_data_current = X_train_current[-6, ...]
    # input_data_traj = tf.expand_dims(input_data_traj, axis=0)  # Add axis for "batch_size"
    # input_data_current = tf.expand_dims(input_data_current, axis=0)  # Add axis for "batch_size"
    # actual_action = Y_act_Train[-6]
    # yhat = model.predict([input_data_traj, input_data_current])
    #
    # print("Testing prediction:")
    # print("Actual action: ", actual_action)
    # print("Predicted action: ", yhat)
    #
    # ### Predict trajectory
    # print("Predict Trajectory:")
    # predict_game(model, input_data_traj, predict_steps=5)

    print("------------------------------------")
    print("Congratultions! You have reached the end of the script.")
    print("------------------------------------")

FileNotFoundError: ignored

In [None]:
!ls