# playing Atari with Deep Learning

In [3]:
# choose game
game = "Breakout-v0"

# generate Dataset by random play

* saving only good sequences

In [4]:
import numpy as np
import gym

env = gym.make(game)
x_data = [] # Bilder (States)
y_data = [] # Label (Aktionen)

DATASET_SIZE = 500 #1500
saved_episodes = 0

# Datensatz generieren
the_end = False
while not the_end:
    done = False
    state = env.reset()
    episode_obs = []
    episode_acts = []
  
    while not done:
        action = env.action_space.sample()
        episode_obs.append(state)
        episode_acts.append(action)
        state, reward, done, info = env.step(action)
    
        if reward < 0:
            episode_acts = []
            episode_obs = []
        elif reward > 0:
            x_data += episode_obs
            y_data += episode_acts
            episode_obs = []
            episode_acts = []
            saved_episodes += 1
            if saved_episodes % 100 == 0:
                print(saved_episodes)
            if saved_episodes+1 > DATASET_SIZE:
                print("Dataset completed")
                the_end = True

KeyboardInterrupt: 

# Preprocessing

In [None]:
x_data = np.array(x_data)
y_data = np.array(y_data)
x_data.shape, y_data.shape

In [None]:
x_data[0].shape

### resize images to 84x84 pixels

In [None]:
import cv2
def resize(img):
    height = 84
    width = 84
    return cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)

In [None]:
resized_x_data = [resize(img) for img in x_data]
resized_x_data = np.array(resized_x_data)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline  

fig, axs = plt.subplots(1,2)

axs[0].imshow(x_data[0, :, :, :])
axs[0].set_title("Original", fontsize="15")
axs[1].imshow(resized_x_data[0, :, :, :])
axs[1].set_title("Resized", fontsize="15")
plt.show()

### RGB to Grayscale

In [None]:
def grayscale(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = np.reshape(img, (img.shape[0], img.shape[1], 1))
    return img

In [None]:
gray_x_data = [grayscale(img) for img in resized_x_data]
gray_x_data = np.array(gray_x_data)

In [None]:
fig, axs = plt.subplots(1, 2)
axs[0].imshow(resized_x_data[0, :, :, :])
axs[0].set_title("Resized", fontsize="15")
axs[0].axis('off')
axs[1].imshow(gray_x_data[0, :, :, 0], cmap="gray")
axs[1].set_title("Grayscale", fontsize="15")
axs[1].axis('off')
plt.show()

### Frame Stack

In [None]:
from collections import deque
def framestack(dataset):
    data = []
    frames = deque(maxlen=4)

    # initialize first deque
    for _ in range(4):
        frames.append(dataset[0])
    framestack = np.asarray(frames, dtype=np.float32)
    framestack = np.moveaxis(framestack, 0, -1).reshape(84, 84, -1)

    # create deques from dataset
    for state in dataset:
        frames.append(state)
        framestack = np.asarray(frames, dtype=np.float32)
        framestack = np.moveaxis(framestack, 0, -1).reshape(84, 84, -1)
        data.append(framestack)

    return data

In [None]:
stack_data = framestack(gray_x_data) 
stack_data = np.array(stack_data)

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(10,5))
axs[0].imshow(stack_data[0+4, :, :, 0], cmap="gray") # +4, because of 4 same initial frames
axs[0].axis('off')
axs[1].imshow(stack_data[1+4, :, :, 1], cmap="gray")
axs[1].axis('off')
axs[2].imshow(stack_data[2+4, :, :, 2], cmap="gray")
axs[2].axis('off')
axs[3].imshow(stack_data[3+4, :, :, 3], cmap="gray")
axs[3].axis('off')
plt.show()

### Sequenes

In [None]:
def sequences(x_data, y_data):
    x = [x_data[:4]]
    y = [y_data[3]]
    
    n = 4
    for i in range(n, x_data.shape[0]):
        x.append(x_data[4:n+4])
        y.append(y_data[i])
   
    x = np.asarray(x)
    y = np.asarray(y)
    
    return x, y

In [None]:
x_sequences, y_sequences = sequences(gray_x_data, y_data)

In [None]:
x_sequences.shape, y_sequences.shape

### converting labels to categorical labels

In [None]:
from tensorflow.keras.utils import to_categorical
y = to_categorical(y=y_data, num_classes=env.action_space.n)
y_seq = to_categorical(y=y_sequences, num_classes=env.action_space.n)

### shuffle data

In [None]:
# create random indices
idx = np.random.permutation(x_data.shape[0])
# x - resized grayscale input
# x_stacked - resized grayscale input in framestacks
# labels - actions in to_categorical
x, x_stacked, labels = gray_x_data[idx], stack_data[idx], y[idx]

idx_seq = np.random.permutation(x_sequences.shape[0])
x_sequences, labels_seq = x_sequences[idx_seq], y_seq[idx_seq]

# create a model


In [None]:
import tensorflow
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import ConvLSTM2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers import Adam

def create_nn(input_shape, output_shape, lstm):
    net_input = Input(shape=input_shape) 
    
    if lstm: # Input: samples, time, rows, cols, channels
        x = ConvLSTM2D(filters=32, kernel_size=(8, 8), strides=(4, 4), padding="same", return_sequences=True, data_format='channels_last')(net_input)
        x = ConvLSTM2D(filters=64, kernel_size=(4, 4), strides=(2, 2), padding="same", return_sequences=True)(x)
        x = ConvLSTM2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same", return_sequences=False)(x)
    else: # Input: rows, cols, channels
        x = Conv2D(filters=32, kernel_size=(8, 8), strides=(4, 4), padding="same", activation="relu")(net_input)
        x = Conv2D(filters=64, kernel_size=(4, 4), strides=(2, 2), padding="same", activation="relu")(x)
        x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu")(x)
    
    x = Flatten()(x)
    x = Dense(512, activation="relu")(x)
    net_output = Dense(output_shape, activation="softmax")(x)

    OPTIMIZER = Adam(lr=0.00005)

    model = Model(inputs=net_input, outputs=net_output)
    model.compile(loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["categorical_accuracy"])

    return model

# Train a model on resized-grayscale input



In [None]:
x.shape[1:]

In [None]:
labels.shape[1]

In [None]:
model_1 = create_nn(x.shape[1:], labels.shape[1], False)

In [None]:
EPOCHS = 85

result_1 = model_1.fit(
    x,
    labels,
    batch_size=128,
    epochs=EPOCHS, 
    verbose=1)

In [None]:
for episode in range(5):
    done = False
    episode_reward = 0.0
    state = env.reset()
    state = resize(state)
    state = grayscale(state)
    state = np.reshape(state, (1, 84, 84, 1))
    state = state.astype("float32")
    while not done:
        action = np.argmax(model_1.predict(state))
        state, reward, done, info = env.step(action)
        state = resize(state)
        state = grayscale(state)
        state = np.reshape(state, (1, 84, 84, 1))
        state = state.astype("float32")
        episode_reward += reward
    print("Episode:", episode+1, "\tReward:", episode_reward)

In [None]:
res1_loss = result_1.history['loss']
res1_acc = result_1.history['categorical_accuracy']

In [None]:
plt.figure(figsize=(15,10))
plt.grid(linestyle='-')
plt.plot(res1_loss, label="loss")
plt.plot(res1_acc, label="accuracy")
plt.legend()
plt.show()

In [None]:
from datetime import datetime
import os
now = datetime.now()
path = "results/" + str(now) + "_" + game

try:
    if not os.path.exists(path):
        os.makedirs(path)
except OSError:
    print ('Error: Creating directory. ' +  path)

In [None]:
model_1.save_weights(path + "/nn_" + str(DATASET_SIZE) + "Examples_" + str(EPOCHS) + "Epochs.h5")

In [None]:
import pandas as pd
df = pd.DataFrame(list(zip(res1_loss, res1_acc)), 
               columns =['Loss', 'Accuracy']) 
df.to_csv(path + "/nn_" + str(DATASET_SIZE) + "Examples_" + str(EPOCHS) + "Epochs.csv",mode="w", index=False)

# Train a model on resized-grayscale-framestack input



In [None]:
x_stacked.shape[1:]

In [None]:
labels.shape[1]

In [None]:
model_2 = create_nn(x_stacked.shape[1:], labels.shape[1], False)

In [None]:
EPOCHS = 85#250

result_2 = model_2.fit(
    x_stacked,
    labels,
    batch_size=128,
    epochs=EPOCHS, 
    verbose=1)

In [None]:
for episode in range(5):
    done = False
    episode_reward = 0.0
    state = env.reset()
    state = resize(state)
    state = grayscale(state)
    state = np.reshape(state, (84, 84, 1))
    frames = deque(maxlen=4)
    # initialize deque for framestack
    for _ in range(4):
        frames.append(state)
    framestack = np.asarray(frames, dtype=np.float32)
    framestack = np.moveaxis(framestack, 0, -1).reshape(1, 84, 84, 4)#reshape(84, 84, -1)

    # playing a episode
    while not done:
        action = np.argmax(model_2.predict(framestack))
        state, reward, done, info = env.step(action)
        episode_reward += reward
        state = resize(state)
        state = grayscale(state)
        state = np.reshape(state, (84, 84, 1))
        frames.append(state)
        framestack = np.asarray(frames, dtype=np.float32)
        framestack = np.moveaxis(framestack, 0, -1).reshape(1, 84, 84, 4)
        
    print("Episode:", episode+1, "\tReward:", episode_reward)

In [None]:
res2_loss = result_2.history['loss']
res2_acc = result_2.history['categorical_accuracy']

In [None]:
plt.figure(figsize=(15,10))
plt.grid(linestyle='-')
plt.plot(res2_loss, label="loss")
plt.plot(res2_acc, label="accuracy")
plt.legend()
plt.show()

In [None]:
model_2.save_weights(path + "/nn_framestack_" + str(DATASET_SIZE) + "Examples_" + str(EPOCHS) + "Epochs.h5")

In [None]:
df = pd.DataFrame(list(zip(res2_loss, res2_acc)), 
               columns =['Loss', 'Accuracy']) 
df.to_csv(path + "/nn_framestack_" + str(DATASET_SIZE) + "Examples_" + str(EPOCHS) + "Epochs.csv",mode="w", index=False)

# Train a model on image-sequences input

In [None]:
x_sequences.shape[1:]

In [None]:
labels_seq.shape[1]

In [None]:
model_3 = create_nn(x_sequences.shape[1:], labels_seq.shape[1], True)

In [None]:
EPOCHS = 85#250

result_3 = model_3.fit(
    x_sequences,
    labels_seq,
    batch_size=128,
    epochs=EPOCHS, 
    verbose=1)

In [None]:
res3_loss = result_3.history['loss']
res3_acc = result_3.history['categorical_accuracy']

In [None]:
plt.figure(figsize=(15,10))
plt.grid(linestyle='-')
plt.plot(res3_loss, label="loss")
plt.plot(res3_acc, label="accuracy")
plt.legend()
plt.show()

In [None]:
df = pd.DataFrame(list(zip(res3_loss, res3_acc)), 
               columns =['Loss', 'Accuracy']) 
df.to_csv(path + "/nn_seq_" + str(DATASET_SIZE) + "Examples_" + str(EPOCHS) + "Epochs.csv",mode="w", index=False)

In [None]:
model_3.save_weights(path + "/nn_seq_" + str(DATASET_SIZE) + "Examples_" + str(EPOCHS) + "Epochs.h5")