In [0]:
import sys
sys.path.append("/content/drive/My Drive/snakeAI")

from collections import namedtuple

Experience = namedtuple('Experience', ('state', 'direction', 'next_state', 'reward'))

import tensorflow as tf
import tensorflow.keras as k

from Snake.environment import SnakeMaze
from Snake.variables import Status, Cell
from models.replay_memory import ReplayMemory
from models.model_train import train_dqn
from Snake.utils import euclidean_distance

from datetime import datetime
import numpy as np
import cv2
import os
from time import time


# ResNet50

In [0]:
def ResNet50(input_shape, num_not_trainable_blocks=4):
  base_model = k.applications.ResNet50(include_top=False, input_shape=input_shape)
  base_model.trainable = False

  for l in base_model.layers:
    if l.name.split("_")[0] == f"conv{num_not_trainable_blocks+1}":
      break
    l.trainable = True

  model = k.Sequential([
      base_model,
      k.layers.Flatten(),
      k.layers.Dense(4096, activation='relu'),
      k.layers.Dense(2048, activation='relu'),
      k.layers.Dense(1024, activation='relu'),
      k.layers.Dense(512, activation='relu'),
      k.layers.Dense(4, activation='linear'),
  ])

  model.build(input_shape=input_shape)
  return model

### Configuration

In [3]:
"""
            Training model

            :param model: tf.keras.Model
                An instance ovr the keras.Model
            :param reward: function
                A function that returns the reward of the current state

                Args:
                    ::param snake: Snake
                    ::param env: SnakeMaze
                    ::param direction: Direction

                Returns:
                    int: The current reward

            :param optimizer: tf.keras.Optimizer
                The optimizer used to train the model

            :param kwargs:
                save_images (bool): False
                    If true images are saved on the eval episode

                save_videos (bool): False
                    If true videos are saved on the eval episode

                save_models (bool): False
                    If true models are saved on the eval episode

                save_graphs (bool): False
                    If true graphs are saved on the eval episode

                evaluate_each (int): 50
                    Number of episodes between two evaluations

                num_rolling_avg_sample (int): 50
                    Number of episodes that are used for the rolling average

                max_steps_per_episode (int): 200
                    Max number of steps in each episode

                num_episodes (int, None): None
                    Number of episodes to train, if None runs forever

                gamma (float): .8
                    The discount factor

                epsilon (float): 1.
                    The exploration/exploitation rate

                epsilon_decay (float): 0.0005
                    The decay of the exploration/exploitation rate

                min_epsilon (float): 0.01
                    The epsilon convergence

                memory_size (int): 100000
                    Capacity of the reply memory, if less then 50 Replay Memory will not be used

                boundaries (bool): True
                    Include boundaries in the maze

                maze_width (int): 10
                    The width of the maze

                maze_height (int, None): None
                    The height of the maze, if not specified the maze is squared

                max_snakes (int): 1
                    The max number of snakes in the environment

                path_to_weights (str): None
                    Path to saved weights for the model

                image_size (tuple[int, int]): (112, 112)
                    The size of the image used as input in the model

                verbose (bool): True
                    Print info about the training

                training_dir (str): './'
                    Path to the directory for training
            """

"\n            Training model\n\n            :param model: tf.keras.Model\n                An instance ovr the keras.Model\n            :param reward: function\n                A function that returns the reward of the current state\n\n                Args:\n                    ::param snake: Snake\n                    ::param env: SnakeMaze\n                    ::param direction: Direction\n\n                Returns:\n                    int: The current reward\n\n            :param optimizer: tf.keras.Optimizer\n                The optimizer used to train the model\n\n            :param kwargs:\n                save_images (bool): False\n                    If true images are saved on the eval episode\n\n                save_videos (bool): False\n                    If true videos are saved on the eval episode\n\n                save_models (bool): False\n                    If true models are saved on the eval episode\n\n                save_graphs (bool): False\n                   

In [0]:
NUM_NOT_TRAINABLE_BLOCKS = 4

IMAGE_SIZE = (64, 64)
IMAGE_SHAPE = (*IMAGE_SIZE, 3)

learning_rate = 1e-5

config = {
  "num_rolling_avg_sample": 10,
  "evaluate_each": 20,
  "save_models": True,
  "save_graphs": True,
  "gamma": .99,
  "epsilon_decay": 0.0005,
  "boundaries": False,
  "maze_width": 10,
  "image_size": IMAGE_SIZE,
  "batch_size": 128,
  "comment": f"""
  Architecture: ResNet50
  Weights: imagenet, {NUM_NOT_TRAINABLE_BLOCKS} non trainable blocks
  Optimizer: Adam ( with amsgrad )
  Hyperparameters:
    lr = {learning_rate}
  Reward:
      if snake.status == Status.DEAD:
        return 0
    else:
        r = 0
        r += 1000 if snake.steps_without_food == 1 else 0
        return r

  Reward only for eating food
  """
}

### Training

In [0]:
envv = SnakeMaze(10, 10, 1)
envv.reset()

In [0]:
def reward(snake, env: SnakeMaze, direction):
    if snake.status == Status.DEAD:
        return -1000
        return 0
    else:
        r = 0
        r -= euclidean_distance(envv.snakes[0].body[0], list(envv.food)[0])
        r += 50 if snake.steps_without_food == 1 else 0
        return r

In [0]:
t = time()
config["training_dir"] = os.path.join(*['drive', 'My Drive', 'snakeAI', 'trainings', 'transfer_learningDQN', "ResNet50", datetime.now().strftime('%d%h%Y__%H%M%S%f')])
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, amsgrad=True)

model = ResNet50(input_shape=IMAGE_SHAPE, num_not_trainable_blocks=NUM_NOT_TRAINABLE_BLOCKS)
model.build(input_shape=(None, *IMAGE_SHAPE))
model.compile(optimizer=optimizer)

model = train_dqn(model, optimizer, reward, **config)
t = time() - t

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
____________________________________________________________________________________________
Episode 1 Done!
Episode reward: -500.0
Epsilon: 1.0
Replay Memory size: 136

Model saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/model
Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 2 Done!
Episode reward: -550.0
Epsilon: 0.9995
Replay Memory size: 176

____________________________________________________________________________________________
Episode 3 Done!
Episode reward: -550.0
Epsilon: 0.9990000000000001
Replay Memory size: 280

____________________________________________________________________________________________
Episode 4 Don

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 402 Done!
Episode reward: -500.0
Epsilon: 0.7995000000000221
Replay Memory size: 254

____________________________________________________________________________________________
Episode 403 Done!
Episode reward: -1173.0
Epsilon: 0.7990000000000221
Replay Memory size: 250

____________________________________________________________________________________________
Episode 404 Done!
Episode reward: -500.0
Epsilon: 0.7985000000000222
Replay Memory size: 258

____________________________________________________________________________________________
Episode 405 Done!
Episode reward: -550.0
Epsilon: 0.7980000000000222
Replay Memory size: 330

____________________________________________________________________________________________
Episode 406 Done!
Episode reward: -550.0
Eps

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 422 Done!
Episode reward: -550.0
Epsilon: 0.7895000000000232
Replay Memory size: 356

____________________________________________________________________________________________
Episode 423 Done!
Episode reward: -500.0
Epsilon: 0.7890000000000232
Replay Memory size: 332

____________________________________________________________________________________________
Episode 424 Done!
Episode reward: -550.0
Epsilon: 0.7885000000000233
Replay Memory size: 244

____________________________________________________________________________________________
Episode 425 Done!
Episode reward: -450.0
Epsilon: 0.7880000000000233
Replay Memory size: 252

____________________________________________________________________________________________
Episode 426 Done!
Episode reward: -1497.0
Eps

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 442 Done!
Episode reward: -550.0
Epsilon: 0.7795000000000243
Replay Memory size: 350

____________________________________________________________________________________________
Episode 443 Done!
Episode reward: -600.0
Epsilon: 0.7790000000000243
Replay Memory size: 454

____________________________________________________________________________________________
Episode 444 Done!
Episode reward: -1488.0
Epsilon: 0.7785000000000244
Replay Memory size: 331

____________________________________________________________________________________________
Episode 445 Done!
Episode reward: -550.0
Epsilon: 0.7780000000000244
Replay Memory size: 339

____________________________________________________________________________________________
Episode 446 Done!
Episode reward: -450.0
Eps

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 462 Done!
Episode reward: -1399.0
Epsilon: 0.7695000000000254
Replay Memory size: 398

____________________________________________________________________________________________
Episode 463 Done!
Episode reward: -1351.0
Epsilon: 0.7690000000000254
Replay Memory size: 342

____________________________________________________________________________________________
Episode 464 Done!
Episode reward: -600.0
Epsilon: 0.7685000000000255
Replay Memory size: 382

____________________________________________________________________________________________
Episode 465 Done!
Episode reward: -1087.0
Epsilon: 0.7680000000000256
Replay Memory size: 334

____________________________________________________________________________________________
Episode 466 Done!
Episode reward: -1224.0


  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 482 Done!
Episode reward: -1092.0
Epsilon: 0.7595000000000265
Replay Memory size: 244

____________________________________________________________________________________________
Episode 483 Done!
Episode reward: -600.0
Epsilon: 0.7590000000000265
Replay Memory size: 284

____________________________________________________________________________________________
Episode 484 Done!
Episode reward: -600.0
Epsilon: 0.7585000000000266
Replay Memory size: 228

____________________________________________________________________________________________
Episode 485 Done!
Episode reward: -1309.0
Epsilon: 0.7580000000000267
Replay Memory size: 222

____________________________________________________________________________________________
Episode 486 Done!
Episode reward: -1107.0
E

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 502 Done!
Episode reward: -1470.0
Epsilon: 0.7495000000000276
Replay Memory size: 283

____________________________________________________________________________________________
Episode 503 Done!
Episode reward: -1416.0
Epsilon: 0.7490000000000276
Replay Memory size: 360

____________________________________________________________________________________________
Episode 504 Done!
Episode reward: -500.0
Epsilon: 0.7485000000000277
Replay Memory size: 336

____________________________________________________________________________________________
Episode 505 Done!
Episode reward: -1236.0
Epsilon: 0.7480000000000278
Replay Memory size: 385

____________________________________________________________________________________________
Episode 506 Done!
Episode reward: -600.0
E

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 522 Done!
Episode reward: -1362.0
Epsilon: 0.7395000000000287
Replay Memory size: 270

____________________________________________________________________________________________
Episode 523 Done!
Episode reward: -550.0
Epsilon: 0.7390000000000287
Replay Memory size: 246

____________________________________________________________________________________________
Episode 524 Done!
Episode reward: -500.0
Epsilon: 0.7385000000000288
Replay Memory size: 286

____________________________________________________________________________________________
Episode 525 Done!
Episode reward: -1404.0
Epsilon: 0.7380000000000289
Replay Memory size: 295

____________________________________________________________________________________________
Episode 526 Done!
Episode reward: -878.0
Ep

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 542 Done!
Episode reward: -1101.0
Epsilon: 0.7295000000000298
Replay Memory size: 273

____________________________________________________________________________________________
Episode 543 Done!
Episode reward: -550.0
Epsilon: 0.7290000000000298
Replay Memory size: 313

____________________________________________________________________________________________
Episode 544 Done!
Episode reward: -1354.0
Epsilon: 0.7285000000000299
Replay Memory size: 258

____________________________________________________________________________________________
Episode 545 Done!
Episode reward: -550.0
Epsilon: 0.72800000000003
Replay Memory size: 266

____________________________________________________________________________________________
Episode 546 Done!
Episode reward: -450.0
Epsi

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 562 Done!
Episode reward: -1272.0
Epsilon: 0.7195000000000309
Replay Memory size: 351

____________________________________________________________________________________________
Episode 563 Done!
Episode reward: -500.0
Epsilon: 0.719000000000031
Replay Memory size: 327

____________________________________________________________________________________________
Episode 564 Done!
Episode reward: -1266.0
Epsilon: 0.718500000000031
Replay Memory size: 258

____________________________________________________________________________________________
Episode 565 Done!
Episode reward: -550.0
Epsilon: 0.7180000000000311
Replay Memory size: 298

____________________________________________________________________________________________
Episode 566 Done!
Episode reward: -1205.0
Eps

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 582 Done!
Episode reward: -1387.0
Epsilon: 0.709500000000032
Replay Memory size: 241

____________________________________________________________________________________________
Episode 583 Done!
Episode reward: -500.0
Epsilon: 0.709000000000032
Replay Memory size: 249

____________________________________________________________________________________________
Episode 584 Done!
Episode reward: -1369.0
Epsilon: 0.7085000000000321
Replay Memory size: 263

____________________________________________________________________________________________
Episode 585 Done!
Episode reward: -500.0
Epsilon: 0.7080000000000322
Replay Memory size: 303

____________________________________________________________________________________________
Episode 586 Done!
Episode reward: -500.0
Epsi

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 602 Done!
Episode reward: -550.0
Epsilon: 0.6995000000000331
Replay Memory size: 318

____________________________________________________________________________________________
Episode 603 Done!
Episode reward: -500.0
Epsilon: 0.6990000000000332
Replay Memory size: 262

____________________________________________________________________________________________
Episode 604 Done!
Episode reward: -550.0
Epsilon: 0.6985000000000332
Replay Memory size: 238

____________________________________________________________________________________________
Episode 605 Done!
Episode reward: -400.0
Epsilon: 0.6980000000000333
Replay Memory size: 374

____________________________________________________________________________________________
Episode 606 Done!
Episode reward: -1170.0
Eps

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 622 Done!
Episode reward: -550.0
Epsilon: 0.6895000000000342
Replay Memory size: 337

____________________________________________________________________________________________
Episode 623 Done!
Episode reward: -600.0
Epsilon: 0.6890000000000343
Replay Memory size: 441

____________________________________________________________________________________________
Episode 624 Done!
Episode reward: -550.0
Epsilon: 0.6885000000000343
Replay Memory size: 449

____________________________________________________________________________________________
Episode 625 Done!
Episode reward: -550.0
Epsilon: 0.6880000000000344
Replay Memory size: 393

____________________________________________________________________________________________
Episode 626 Done!
Episode reward: -450.0
Epsi

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 642 Done!
Episode reward: -550.0
Epsilon: 0.6795000000000353
Replay Memory size: 454

____________________________________________________________________________________________
Episode 643 Done!
Episode reward: -1392.0
Epsilon: 0.6790000000000354
Replay Memory size: 491

____________________________________________________________________________________________
Episode 644 Done!
Episode reward: -1080.0
Epsilon: 0.6785000000000354
Replay Memory size: 378

____________________________________________________________________________________________
Episode 645 Done!
Episode reward: -1320.0
Epsilon: 0.6780000000000355
Replay Memory size: 455

____________________________________________________________________________________________
Episode 646 Done!
Episode reward: -450.0
E

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 662 Done!
Episode reward: -600.0
Epsilon: 0.6695000000000364
Replay Memory size: 248

____________________________________________________________________________________________
Episode 663 Done!
Episode reward: -550.0
Epsilon: 0.6690000000000365
Replay Memory size: 320

____________________________________________________________________________________________
Episode 664 Done!
Episode reward: -550.0
Epsilon: 0.6685000000000365
Replay Memory size: 264

____________________________________________________________________________________________
Episode 665 Done!
Episode reward: -450.0
Epsilon: 0.6680000000000366
Replay Memory size: 272

____________________________________________________________________________________________
Episode 666 Done!
Episode reward: -1407.0
Eps

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 682 Done!
Episode reward: -1407.0
Epsilon: 0.6595000000000375
Replay Memory size: 301

____________________________________________________________________________________________
Episode 683 Done!
Episode reward: -600.0
Epsilon: 0.6590000000000376
Replay Memory size: 341

____________________________________________________________________________________________
Episode 684 Done!
Episode reward: -550.0
Epsilon: 0.6585000000000376
Replay Memory size: 285

____________________________________________________________________________________________
Episode 685 Done!
Episode reward: -550.0
Epsilon: 0.6580000000000377
Replay Memory size: 229

____________________________________________________________________________________________
Episode 686 Done!
Episode reward: -500.0
Eps

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__200157933012/Reward.png
____________________________________________________________________________________________
Episode 702 Done!
Episode reward: -500.0
Epsilon: 0.6495000000000386
Replay Memory size: 317

____________________________________________________________________________________________
Episode 703 Done!
Episode reward: -500.0
Epsilon: 0.6490000000000387
Replay Memory size: 325

____________________________________________________________________________________________
Episode 704 Done!
Episode reward: -550.0
Epsilon: 0.6485000000000387
Replay Memory size: 269

____________________________________________________________________________________________
Episode 705 Done!
Episode reward: -1470.0
Epsilon: 0.6480000000000388
Replay Memory size: 300

____________________________________________________________________________________________
Episode 706 Done!
Episode reward: -550.0
Eps

In [0]:
assert False
env = SnakeMaze(20, 20, 1, with_boundaries=False)
env.reset()
imgs = []
while env.num_active_agents:
    imgs.append(env.snake_matrices[0])
    # state = resize_image(env.snake_matrices[0], IMAGE_SIZE)
    direction = np.argmax(model(np.array([state])))
    env.step({0: direction})

In [0]:
generate_animation(imgs)