# ML-Brawlhalla: Trial 3

## Import Packages
The environment module is not included in this code, so that results cannot be abused.

In [1]:
from time import sleep
import numpy as np
import tensorflow as tf
from tensorflow import keras

from paiutils import neural_network as nn
from paiutils import reinforcement as rl
from paiutils import reinforcement_agents as ra

from environment import BrawlhallaFollow2 as Brawlhalla

## Create Environment

In [2]:
stack_size = 1
time_size = 3
env = Brawlhalla(stack_size=stack_size)
max_steps = 5000

## Create Models

In [3]:
x0 = keras.layers.Input((time_size, 64 * 64 + 11, stack_size))
frame_input = keras.layers.Lambda(
    lambda x: tf.reshape(tf.transpose(x[:,:,:-11,:], [0, 3, 2, 1]), [-1, 64, 64, stack_size * time_size])
)(x0)
other_input = keras.layers.Lambda(
    lambda x: tf.reshape(x[:,:,-11:,:], [-1, 11 * stack_size * time_size])
)(x0)

x = nn.conv2d(32, 5, 2)(frame_input)
x = nn.conv2d(64, 3, 2)(x)
x = nn.conv2d(128, 3, 2)(x)
x = nn.conv2d(256, 3, 2)(x)
x = nn.conv2d(512, 3, 2)(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Concatenate()([x, other_input])
x = nn.dense(128)(x)
outputs = keras.layers.Dense(9, activation='softmax')(x)
model = keras.models.Model(inputs=x0, outputs=outputs)
model.compile(optimizer=keras.optimizers.Adam(.0005), loss='mse')
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 3, 4107, 1)] 0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 64, 64, 3)    0           input_1[0][0]                    
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 32, 32, 32)   2432        lambda[0][0]                     
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 32, 32, 32)   128         conv2d[0][0]                     
_______________________________________________________________________________________

## Create the Agent

In [4]:
def create_memory(shape, dtype):
    if shape == (None, time_size, 64 * 64 + 11, stack_size):
        void_state = np.append(np.zeros(64 * 64), np.ones(11))
        void_state = np.expand_dims(void_state, axis=-1)
        return rl.ETDMemory(time_size, void_state)
    return rl.Memory()

agent = rl.PGAgent(model, .99, create_memory=create_memory)

## Train the Agent

In [5]:
save_dir = 'saves/'

sleep(2)
agent.set_playing_data(memorizing=True, verbose=True)
env.play_episodes(agent, 50, max_steps, random=True,
                  verbose=True, episode_verbose=False)
agent.save(save_dir, note='PG_random')

agent.set_playing_data(
    training=True, memorizing=True, batch_size=64,
    mini_batch=12800, epochs=1, repeat=1,
    entropy_coef=0, verbose=True
)
num_episodes = 10
for ndx in range(10):
    print(f'Save Loop: {ndx}')
    result = env.play_episodes(
        agent, num_episodes, max_steps,
        verbose=True, episode_verbose=False
    )
    agent.save(save_dir, note=f'PG_{ndx}_{result}')

Time: 12:57:16 - Episode: 1 - Steps: 415 - Total Reward: 0.09500000000001241 - Best Total Reward: 0.09500000000001241 - Average Total Reward: 0.09500000000001241 - Memory Size: 415
Time: 12:57:34 - Episode: 2 - Steps: 311 - Total Reward: -0.21099999999999186 - Best Total Reward: 0.09500000000001241 - Average Total Reward: -0.057999999999989726 - Memory Size: 726
Time: 12:57:56 - Episode: 3 - Steps: 457 - Total Reward: -1.5269999999999908 - Best Total Reward: 0.09500000000001241 - Average Total Reward: -0.5476666666666568 - Memory Size: 1183
Time: 12:58:26 - Episode: 4 - Steps: 677 - Total Reward: 2.9430000000000063 - Best Total Reward: 2.9430000000000063 - Average Total Reward: 0.325000000000009 - Memory Size: 1860
Time: 12:58:40 - Episode: 5 - Steps: 277 - Total Reward: -0.8869999999999951 - Best Total Reward: 2.9430000000000063 - Average Total Reward: 0.08260000000000818 - Memory Size: 2137
Time: 12:58:50 - Episode: 6 - Steps: 159 - Total Reward: -2.639 - Best Total Reward: 2.9430000

Time: 13:10:01 - Episode: 47 - Steps: 126 - Total Reward: -2.816 - Best Total Reward: 15.226000000000194 - Average Total Reward: -0.2802765957446702 - Memory Size: 15013
Time: 13:10:41 - Episode: 48 - Steps: 952 - Total Reward: -0.7819999999999858 - Best Total Reward: 15.226000000000194 - Average Total Reward: -0.2907291666666559 - Memory Size: 15965
Time: 13:11:01 - Episode: 49 - Steps: 379 - Total Reward: 1.9009999999999851 - Best Total Reward: 15.226000000000194 - Average Total Reward: -0.24599999999998975 - Memory Size: 16344
Time: 13:11:44 - Episode: 50 - Steps: 1076 - Total Reward: 7.144000000000229 - Best Total Reward: 15.226000000000194 - Average Total Reward: -0.09819999999998537 - Memory Size: 17420
Save Loop: 0
Repeat 1/1
Epoch 1/1
12800/12800 - loss: -1.9126914739608765
Time: 13:11:59 - Episode: 1 - Steps: 142 - Total Reward: 3.838 - Best Total Reward: 3.838 - Average Total Reward: 3.838 - Memory Size: 17562
Repeat 1/1
Epoch 1/1
12800/12800 - loss: -2.412881851196289
Time: 

12800/12800 - loss: -5.172921180725098
Time: 13:21:33 - Episode: 3 - Steps: 119 - Total Reward: -2.629 - Best Total Reward: -0.549999999999998 - Average Total Reward: -1.894999999999999 - Memory Size: 25035
Repeat 1/1
Epoch 1/1
12800/12800 - loss: -5.200456619262695
Time: 13:21:46 - Episode: 4 - Steps: 93 - Total Reward: -2.923 - Best Total Reward: -0.549999999999998 - Average Total Reward: -2.1519999999999992 - Memory Size: 25128
Repeat 1/1
Epoch 1/1
12800/12800 - loss: -5.261957168579102
Time: 13:22:01 - Episode: 5 - Steps: 129 - Total Reward: -2.7489999999999997 - Best Total Reward: -0.549999999999998 - Average Total Reward: -2.271399999999999 - Memory Size: 25257
Repeat 1/1
Epoch 1/1
12800/12800 - loss: -5.273199081420898
Time: 13:22:11 - Episode: 6 - Steps: 39 - Total Reward: 4.061000000000001 - Best Total Reward: 4.061000000000001 - Average Total Reward: -1.215999999999999 - Memory Size: 25296
Repeat 1/1
Epoch 1/1
12800/12800 - loss: -5.252521991729736
Time: 13:22:24 - Episode: 7

Repeat 1/1
Epoch 1/1
12800/12800 - loss: -5.381250381469727
Time: 13:32:17 - Episode: 8 - Steps: 77 - Total Reward: -2.827 - Best Total Reward: 0.6490000000000031 - Average Total Reward: -1.8472499999999987 - Memory Size: 31355
Repeat 1/1
Epoch 1/1
12800/12800 - loss: -5.311717987060547
Time: 13:32:49 - Episode: 9 - Steps: 328 - Total Reward: -2.3279999999999994 - Best Total Reward: 0.6490000000000031 - Average Total Reward: -1.9006666666666652 - Memory Size: 31683
Repeat 1/1
Epoch 1/1
12800/12800 - loss: -5.3541035652160645
Time: 13:33:06 - Episode: 10 - Steps: 135 - Total Reward: -2.625 - Best Total Reward: 0.6490000000000031 - Average Total Reward: -1.9730999999999987 - Memory Size: 31818
Save Loop: 7
Repeat 1/1
Epoch 1/1
12800/12800 - loss: -5.372714042663574
Time: 13:33:23 - Episode: 1 - Steps: 143 - Total Reward: -2.0230000000000006 - Best Total Reward: -2.0230000000000006 - Average Total Reward: -2.0230000000000006 - Memory Size: 31961
Repeat 1/1
Epoch 1/1
12800/12800 - loss: -5

## Test the Agent

In [6]:
path = 'saves/trial3'
agent.load(path, load_data=False)
sleep(2)
agent.set_playing_data(training=False, memorizing=False)
env.play_episodes(agent, 20, 10000, verbose=True,
                  episode_verbose=False)

Time: 19:15:27 - Episode: 1 - Steps: 627 - Total Reward: 4.6929999999999765 - Best Total Reward: 4.6929999999999765 - Average Total Reward: 4.6929999999999765 - Memory Size: 0
Time: 19:15:40 - Episode: 2 - Steps: 163 - Total Reward: -0.6129999999999955 - Best Total Reward: 4.6929999999999765 - Average Total Reward: 2.0399999999999903 - Memory Size: 0
Time: 19:16:10 - Episode: 3 - Steps: 587 - Total Reward: 2.5930000000000093 - Best Total Reward: 4.6929999999999765 - Average Total Reward: 2.22433333333333 - Memory Size: 0
Time: 19:16:23 - Episode: 4 - Steps: 181 - Total Reward: -1.570999999999997 - Best Total Reward: 4.6929999999999765 - Average Total Reward: 1.275499999999998 - Memory Size: 0
Time: 19:16:38 - Episode: 5 - Steps: 258 - Total Reward: -2.3579999999999997 - Best Total Reward: 4.6929999999999765 - Average Total Reward: 0.5487999999999985 - Memory Size: 0
Time: 19:16:45 - Episode: 6 - Steps: 59 - Total Reward: 4.091000000000001 - Best Total Reward: 4.6929999999999765 - Avera

-0.24159999999999743