In [0]:
import sys
sys.path.append("/content/drive/My Drive/snakeAI")

from collections import namedtuple

Experience = namedtuple('Experience', ('state', 'direction', 'next_state', 'reward'))

import tensorflow as tf
import tensorflow.keras as k

from Snake.environment import SnakeMaze
from Snake.variables import Status, Cell
from models.replay_memory import ReplayMemory
from models.model_train import train_dqn
from Snake.utils import euclidean_distance

from datetime import datetime
import numpy as np
import cv2
import os
from time import time


# ResNet50

In [0]:
def ResNet50(input_shape, num_not_trainable_blocks=4):
  base_model = k.applications.ResNet50(include_top=False, input_shape=input_shape)
  base_model.trainable = False

  for l in base_model.layers:
    if l.name.split("_")[0] == f"conv{num_not_trainable_blocks+1}":
      break
    l.trainable = True

  model = k.Sequential([
      base_model,
      k.layers.Flatten(),
      k.layers.Dense(4096, activation='relu'),
      k.layers.Dense(2048, activation='relu'),
      k.layers.Dense(1024, activation='relu'),
      k.layers.Dense(4, activation='softmax'),
  ])

  model.build(input_shape=input_shape)
  return model

### Configuration

In [0]:
NUM_NOT_TRAINABLE_BLOCKS = 4

IMAGE_SIZE = (64, 64)
IMAGE_SHAPE = (*IMAGE_SIZE, 3)

learning_rate = 1e-4

config = {
  "num_rolling_avg_sample": 10,
  "evaluate_each": 20,
  "save_models": True,
  "save_graphs": True,
  "gamma": .99,
  "epsilon_decay": 0.005,
  "boundaries": False,
  "maze_width": 10,
  "image_size": IMAGE_SIZE,
  "batch_size": 128,
  "comment": f"""
  Architecture: ResNet50
  Weights: imagenet, {NUM_NOT_TRAINABLE_BLOCKS} non trainable blocks
  Optimizer: Adam
  Hyperparameters:
    lr = {learning_rate}
  Reward:
    if snake.status == Status.DEAD:
        return -1000
        return 0
    else:
        r = 0
        r -= euclidean_distance(envv.snakes[0].body[0], list(envv.food)[0])
        r += 10 if snake.steps_without_food == 1 else 0
        return r
  Reward only for eating food
  """
}

### Training

In [0]:
envv = SnakeMaze(10, 10, 1)
envv.reset()

In [0]:
def reward(snake, env: SnakeMaze, direction):
    if snake.status == Status.DEAD:
        return -1000
        return 0
    else:
        r = 0
        r -= euclidean_distance(envv.snakes[0].body[0], list(envv.food)[0])
        r += 10 if snake.steps_without_food == 1 else 0
        return r

In [0]:
t = time()
config["training_dir"] = os.path.join(*['drive', 'My Drive', 'snakeAI', 'trainings', 'transfer_learningDQN', "ResNet50", datetime.now().strftime('%d%h%Y__%H%M%S%f')])
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

model = ResNet50(input_shape=IMAGE_SHAPE, num_not_trainable_blocks=NUM_NOT_TRAINABLE_BLOCKS)
model.build(input_shape=(None, *IMAGE_SHAPE))
model.compile(optimizer=optimizer)

model = train_dqn(model, optimizer, reward, **config)
t = time() - t

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
____________________________________________________________________________________________
Episode 1 Done!
Episode reward: -1523.1546211727768
Epsilon: 1.0
Replay Memory size: 104

Model saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__220852167213/model
Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__220852167213/Reward.png
____________________________________________________________________________________________
Episode 2 Done!
Episode reward: -1513.1546211727768
Epsilon: 0.995
Replay Memory size: 144

____________________________________________________________________________________________
Episode 3 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.99
Replay Memory size: 152

__________________________________________________________________________________

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__220852167213/Reward.png
____________________________________________________________________________________________
Episode 402 Done!
Episode reward: -1503.154621172777
Epsilon: 0.01
Replay Memory size: 253

____________________________________________________________________________________________
Episode 403 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 197

____________________________________________________________________________________________
Episode 404 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 141

____________________________________________________________________________________________
Episode 405 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 213

____________________________________________________________________________________________
Episode 406 Done!
Episode reward: -1523.1546211727

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__220852167213/Reward.png
____________________________________________________________________________________________
Episode 422 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 413

____________________________________________________________________________________________
Episode 423 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 389

____________________________________________________________________________________________
Episode 424 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 365

____________________________________________________________________________________________
Episode 425 Done!
Episode reward: -1513.1546211727768
Epsilon: 0.01
Replay Memory size: 341

____________________________________________________________________________________________
Episode 426 Done!
Episode reward: -1523.154621172

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__220852167213/Reward.png
____________________________________________________________________________________________
Episode 442 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 381

____________________________________________________________________________________________
Episode 443 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 389

____________________________________________________________________________________________
Episode 444 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 237

____________________________________________________________________________________________
Episode 445 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 309

____________________________________________________________________________________________
Episode 446 Done!
Episode reward: -1523.154621172

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__220852167213/Reward.png
____________________________________________________________________________________________
Episode 462 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 317

____________________________________________________________________________________________
Episode 463 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 293

____________________________________________________________________________________________
Episode 464 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 429

____________________________________________________________________________________________
Episode 465 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 277

____________________________________________________________________________________________
Episode 466 Done!
Episode reward: -1523.154621172

  fig, (ax1, ax2) = plt.subplots(2, 1)


Episode 482 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 349

____________________________________________________________________________________________
Episode 483 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 421

____________________________________________________________________________________________
Episode 484 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 301

____________________________________________________________________________________________
Episode 485 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 277

____________________________________________________________________________________________
Episode 486 Done!
Episode reward: -1513.154621172777
Epsilon: 0.01
Replay Memory size: 285

____________________________________________________________________________________________
Episode 487 Done!
Episode reward: -1513.1546211727768
Epsilon: 0.01
Rep

  fig, (ax1, ax2) = plt.subplots(2, 1)


Episode 502 Done!
Episode reward: -1513.1546211727768
Epsilon: 0.01
Replay Memory size: 445

____________________________________________________________________________________________
Episode 503 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 421

____________________________________________________________________________________________
Episode 504 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 429

____________________________________________________________________________________________
Episode 505 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 277

____________________________________________________________________________________________
Episode 506 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 253

____________________________________________________________________________________________
Episode 507 Done!
Episode reward: -1503.154621172777
Epsilon: 0.01
Rep

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__220852167213/Reward.png
____________________________________________________________________________________________
Episode 522 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 317

____________________________________________________________________________________________
Episode 523 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 325

____________________________________________________________________________________________
Episode 524 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 365

____________________________________________________________________________________________
Episode 525 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 341

____________________________________________________________________________________________
Episode 526 Done!
Episode reward: -1523.154621172

  fig, (ax1, ax2) = plt.subplots(2, 1)


Graph saved at: drive/My Drive/snakeAI/trainings/transfer_learningDQN/ResNet50/10Jun2020__220852167213/Reward.png
____________________________________________________________________________________________
Episode 542 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 349

____________________________________________________________________________________________
Episode 543 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 389

____________________________________________________________________________________________
Episode 544 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 365

____________________________________________________________________________________________
Episode 545 Done!
Episode reward: -1523.1546211727768
Epsilon: 0.01
Replay Memory size: 437

____________________________________________________________________________________________
Episode 546 Done!
Episode reward: -1523.154621172

In [0]:
assert False
env = SnakeMaze(20, 20, 1, with_boundaries=False)
env.reset()
imgs = []
while env.num_active_agents:
    imgs.append(env.snake_matrices[0])
    # state = resize_image(env.snake_matrices[0], IMAGE_SIZE)
    direction = np.argmax(model(np.array([state])))
    env.step({0: direction})

In [0]:
generate_animation(imgs)