## Load Data

In [None]:
import os

CACHE_DIR = "./cache"
DATASET_DIR = os.path.join(CACHE_DIR, "dataset")
IMIT_LEARNING_DATASET_DIR = os.path.join(DATASET_DIR, "imitation_learning")

In [None]:
import dill as pickle
game_id = 46749
with open(os.path.join(IMIT_LEARNING_DATASET_DIR, 'game_{0}_obs_all.pkl'.format(game_id)), 'rb') as f:
    observations_all = pickle.load(f)
with open(os.path.join(IMIT_LEARNING_DATASET_DIR, 'game_{0}_scores_all.pkl'.format(game_id)), 'rb') as f:
    scores_all = pickle.load(f)
with open(os.path.join(IMIT_LEARNING_DATASET_DIR, 'game_{0}_actions_all.pkl'.format(game_id)), 'rb') as f:
    actions_all = pickle.load(f)

In [None]:
observations_all.shape
scores_all.shape
actions_all.shape

In [None]:
# import enum
# class CarDirection(enum.Enum):
#     north = 0
#     east = 1
#     south = 2
#     west = 3

In [None]:
from matplotlib import pyplot as plt
plt.hist(actions_all.astype('int'))
plt.show()

In [None]:
def center_pad_observations(obs, receptor_size=100):
    npad_ = (receptor_size-obs.shape[1])//2 # make sure the receptive field is always 200
    npads = ((0, 0), (npad_, npad_), (npad_, npad_), (0, 0))
    return np.pad(obs, pad_width=npads, mode='constant', constant_values=0)

## Supervised Learning

In [None]:
from keras.layers import Input, BatchNormalization, Add
from keras.models import Model

def get_res_block(input):
    # Res block 1        
    x = Convolution2D(256, 3, padding='same')(input)
    x = BatchNormalization()(x)
    x = Activation('elu')(x)
    x = Convolution2D(256, 3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Add()([input, x])
    x = Activation('elu')(x)
    return x

def create_model(actions, input_shape=(100, 100, 8,)):
    inp = Input(input_shape)
    x = Convolution2D(256, 3, padding='same')(inp)
    x = BatchNormalization()(x)
    x = Activation('elu')(x)

    # 3 residual blocks
    for i in range(3):
        x = get_res_block(x)

    # Output block
    # Should be 2 filters
    x = Convolution2D(2, 1, padding='same')(x)
    x = BatchNormalization()(x)   
    x = Activation('elu')(x)
    x = Flatten()(x)

    probs  = Dense(actions, activation='softmax', name='actions')(x)
    #reward = Dense(1, activation='tanh', name='reward')(x)

    model = Model(inputs = inp, outputs=probs)
    return model

model_resblock = create_model(5)

In [None]:
from client import CarDirection, Client
from env import JunctionEnvironment

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Softmax
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint
import numpy as np

img_dim = observations_all[0].shape
action_dim = len(np.unique(actions_all))
steps = 1000
batch_size = 32
nb_epoch = 100

#model from https://github.com/fchollet/keras/blob/master/examples/cifar10_cnn.py

def create_model_1():
    model = Sequential()

    model.add(Convolution2D(32, 8, 8, border_mode='same',
                            input_shape=img_dim))
    model.add(Activation('relu'))
    model.add(Convolution2D(32, 8, 8))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Convolution2D(64, 8, 8, border_mode='same'))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 8, 8))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Convolution2D(128, 8, 8, border_mode='same'))
    model.add(Activation('relu'))
    model.add(Convolution2D(128, 8, 8))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(action_dim, activation='softmax'))
    return model

# model = create_model_1()
model = model_resblock
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=Adam(lr=1e-4),
              metrics=['sparse_categorical_crossentropy', 'accuracy'])

model.summary()


###{{{ TensorBoard Logging and CheckPoint Callbacks begin
supervised_experiment = np.random.randint(0, 100000)
tb_cb = TensorBoard(log_dir='./logs/supervised_{0}'.format(supervised_experiment), histogram_freq=0, batch_size=batch_size, 
                    write_graph=True, write_grads=False, write_images=False, 
                    embeddings_freq=0, embeddings_layer_names=None, 
                    embeddings_metadata=None, embeddings_data=None, update_freq='epoch')

checkpoint_path="checkpoints/supervised/weights-improvement-{epoch:02d}-{val_loss:.2f}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)

cp_cb = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=1)

load_checkpoint = '' # example: 'weights-improvement-01-1.50.ckpt'

if load_checkpoint != '':
    load_path = os.path.join(checkpoint_dir, load_checkpoint)
    model.load_weights(load_path)
    print("Loading %s " % load_path)
###}}} TensorBoard Logging and CheckPoint Callbacks Ends

model.fit(observations_all, actions_all,
          batch_size=batch_size,
          nb_epoch=nb_epoch,
          shuffle=True,              
          validation_split=0.2,
          callbacks=[tb_cb, cp_cb])

# output_file = open('results.txt', 'w')

## Dagger

In [None]:
from alg_astar import search

class CurrentTarget:
    def __init__(self):
        self.current_target = None
        
def megaalg(self, obs):
        car_x, car_y = np.where(obs[:,:,4])[0][0], np.where(obs[:,:,4])[1][0]
        customer_positions = []
        customer_dists = []
        paths_to_clients = []
        maze = 1-obs[:,:,0]
        statuses = []

        if obs[:,:,3].sum() > 0:
            # go to destination
            if self.current_target is None:
                self.current_target = np.where(obs[:,:,3])[0][0], np.where(obs[:,:,3])[1][0]
            x, y = self.current_target
            path, _ = search(maze, 1, (car_x, car_y), (x ,y))
            if not path:
                self.current_target = None
                return 4
            target_cell = path[0]
            if len(path)==1:
                self.current_target = None
        else:
            # look for customer

            if obs[:,:,1].sum() == 0:
                return 4
            coords = np.where(obs[:,:,1])

            if self.current_target is None:
                for i in range(len(coords[0])):
                    x, y = coords[0][i], coords[1][i]
                    customer_positions.append((x,y))
                    dist = np.abs(car_x - x) + np.abs(car_y - y)
                    customer_dists.append(dist)
                    if dist > 50:
                        paths_to_clients.append(None)
                        statuses.append(2)
                    else:    
                        path, status = search(maze, 1, (car_x, car_y), (x ,y))
                        paths_to_clients.append(path)
                        statuses.append(status)

                completed_paths = [p for p,s in zip(paths_to_clients, statuses) if s==0]
                if len(completed_paths)>0:
                    min_path = min(completed_paths, key = lambda p: len(p))
                else:
                    min_ind = np.argmin(np.array(customer_dists))
                    min_path = paths_to_clients[min_ind]

                self.current_target = min_path[-1]

                #current_target, path = min(zip(customer_dists, paths_to_clients), key = lambda p: len(p[1]))
                target_cell = min_path[0]
            else:
                x, y = self.current_target
                path, _ = search(maze, 1, (car_x, car_y), (x ,y))
                if not path:
                    self.current_target = None
                    return 4
                target_cell = path[0]
                if len(path)==1:
                    self.current_target = None
            #current_path = current_path[1:] if len(current_path) > 1 else None

            #target_cell = path[0] if path is not None else None

        if target_cell is None:
            return 4
        if target_cell[0] == car_x and target_cell[1] == car_y - 1:
            #return 3
            return 3
        if target_cell[0] == car_x and target_cell[1] == car_y + 1:
            #return 1
            return 1
        if target_cell[0] == car_x - 1 and target_cell[1] == car_y:
            #return 2
            return 2
        if target_cell[0] == car_x + 1 and target_cell[1] == car_y:
            #return 0
            return 0
        return 4
    
def get_teacher_action(currentTarget, observation):
    """ a wrapper for the megaalg """
    return megaalg(currentTarget, observation)

In [None]:
from client import CarDirection, Client
from env import JunctionEnvironment
from time import sleep

# https://github.com/avisingh599/imitation-dagger/blob/master/dagger.py
team_name = "ipa"
team_key = "admin"
# aggregate and retrain
dagger_itr = 5

DAGGER_LEARNING_DATASET_DIR = os.path.join(DATASET_DIR, "dagger_learning")

observations_all_dagger = observations_all
actions_all_dagger = actions_all

dagger_experiment = np.random.randint(0, 100000)
for itr in range(dagger_itr):
    print("begin dagger..")
    ob_list = []
    teacher_actions = []
    
    car_id = '0' # TODO: is it ok to set is to '0' always?
    currentTarget = CurrentTarget()
    client = Client(team_name=team_name, team_key=team_key)
    env = JunctionEnvironment(client)
    ob = env.reset()[car_id]
    score_sum = 0.0
      
    sleep(0.5)
    
    print("make obs with dagger..")
    for i in range(steps):
        ob_padded = center_pad_observations(ob[np.newaxis,:,:,:], receptor_size=100)
        act_pred = model.predict(ob_padded)
        act = np.argmax(act_pred)
        act_teach = get_teacher_action(currentTarget, ob) # most probably the teacher does not depend on the padded observation
        ob, score, done, _ = env.step(act, car_id) #env.step(act)
        if done is True:
            break
        else:
            ob_list.append(ob_padded[0,:,:,:])
            teacher_actions.append(act_teach)
        score_sum += score
        print(i, score, score_sum, done, act)
    print('Episode done ', itr, i, score_sum)
    # output_file.write('Number of Steps: %02d\t Score: %0.04f\n'%(i, score_sum))
    env.close()

    # TODO: what is this for?
    # if i==(steps-1):
    #    break
    observations_all_dagger = np.append(observations_all_dagger, np.array(ob_list), axis=0)
    actions_all_dagger = np.append(actions_all_dagger, teacher_actions, axis=0)

    print("train on dagger..")
    
    ###{{{ TensorBoard Logging and CheckPoint Callbacks begin
    tb_cb = TensorBoard(log_dir='./logs/dagger_{0}'.format(dagger_experiment), histogram_freq=0, batch_size=batch_size, 
                        write_graph=True, write_grads=False, write_images=False, 
                        embeddings_freq=0, embeddings_layer_names=None, 
                        embeddings_metadata=None, embeddings_data=None, update_freq='epoch')

    checkpoint_path="checkpoints/dagger/weights-improvement-{epoch:02d}-{val_loss:.2f}.ckpt"
    checkpoint_dir = os.path.dirname(checkpoint_path)

    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    cp_cb = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=1)

    load_checkpoint = '' # example: 'weights-improvement-01-1.50.ckpt'

    if load_checkpoint != '':
        load_path = os.path.join(checkpoint_dir, load_checkpoint)
        print("loading %s " % load_path)
        model.load_weights(load_path)
    ###}}} TensorBoard Logging and CheckPoint Callbacks Ends

    model.fit(observations_all_dagger, actions_all_dagger,
              batch_size=batch_size,
              validation_split=0.2,
              nb_epoch=1,#nb_epoch,
              shuffle=True,
              callbacks=[tb_cb, cp_cb])
    
print("Saving dagger generated data..")
if not os.path.exists(DAGGER_LEARNING_DATASET_DIR):
    os.makedirs(DAGGER_LEARNING_DATASET_DIR)

with open(os.path.join(DAGGER_LEARNING_DATASET_DIR, f"game_{dagger_experiment}_car_{car_id}_obs.pkl"), "wb") as f:
    pickle.dump(observations_all_dagger, f)
    
with open(os.path.join(DAGGER_LEARNING_DATASET_DIR, f"game_{dagger_experiment}_car_{car_id}_actions.pkl"), "wb") as f:
    pickle.dump(actions_all_dagger, f)