# IMPORT

In [None]:
# IMPORT FOR BASIC UTILITIES

import sys
import io
import gc
import os
import pickle
from tqdm import tqdm
from matplotlib import pyplot as plt
import seaborn as sns
from PIL import Image
import tensorflow as tf
from tensorflow import keras
import numpy as np
import gym
import itertools
import pandas as pd

In [None]:
# GPU QUALITY CHECK

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Wed Aug 24 10:01:36 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# MOUNT DRIVE

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# CLONE REPOSITORY

token = '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'
repo_url = 'https://' + token + '@github.com/Menta99/Advanced_Deep_Learning_Models_and_Methods-Menta-Nisti.git'
!git clone $repo_url

Cloning into 'Advanced_Deep_Learning_Models_and_Methods-Menta-Nisti'...
remote: Enumerating objects: 401, done.[K
remote: Counting objects: 100% (26/26), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 401 (delta 9), reused 16 (delta 6), pack-reused 375[K
Receiving objects: 100% (401/401), 693.76 KiB | 14.45 MiB/s, done.
Resolving deltas: 100% (231/231), done.


In [None]:
# REPOSITORY IMPORTS

sys.path.append('/content/Advanced_Deep_Learning_Models_and_Methods-Menta-Nisti/')
sys.path.append('/content/Advanced_Deep_Learning_Models_and_Methods-Menta-Nisti/Utilities/')

from Agents.DDDQN.DDDQNAgent import DDDQNAgent
from Agents.SAC.SACAgent import SACAgent
from Utilities.TicTacToe import TicTacToeEnv
from Utilities.ConnectFour import ConnectFourEnv
from Utilities.Santorini import SantoriniEnv
from Utilities.Wrappers import OpponentWrapper
from Utilities.TrainWizard import TurnGameTrainWizard

# TEST SETUP

In [None]:
# CONFIGURATION UTILITIES

def get_turn(config_turn):
    if config_turn == 'First':
        return True
    elif config_turn == 'Second':
        return False
    elif config_turn == 'Random':
        return None
    else:
        raise ValueError('Turn provided does not exist!')


def get_env(config_environment, config_representation, config_turn, config_opponent):
    if config_environment == 'TicTacToe':
        return OpponentWrapper(TicTacToeEnv(config_representation, config_turn), config_opponent)
    elif config_environment == 'ConnectFour':
        return OpponentWrapper(ConnectFourEnv(config_representation, config_turn), config_opponent)
    elif config_environment == 'Santorini':
        return OpponentWrapper(SantoriniEnv(config_representation, config_turn, True, False, 0, 0), config_opponent)
    else:
        raise ValueError('Game provided does not exist!')


def get_network_dicts(config_algorithm, config_representation, config_environment):
    if config_algorithm == 'DDDQN':
        if config_representation == 'Tabular':
            network_dict_base = {0:
                                     {'name': 'Dense',
                                       'params': {
                                           'units': 64,
                                           'activation': 'relu',
                                           'kernel_initializer': tf.keras.initializers.HeNormal()
                                       }},
                                 1:
                                     {'name': 'Dense',
                                       'params': {
                                           'units': 32,
                                           'activation': 'relu',
                                           'kernel_initializer': tf.keras.initializers.HeNormal()
                                       }},
                                 2:
                                     {'name': 'Dense',
                                       'params': {
                                           'units': 16,
                                           'activation': 'relu',
                                           'kernel_initializer': tf.keras.initializers.HeNormal()
                                       }}}
            network_dict_advantage = {3:
                                          {'name': 'Flatten',
                                           'params': {}
                                           },
                                      4:
                                          {'name': 'Dense',
                                           'params': {
                                               'units': 512,
                                               'activation': 'relu',
                                               'kernel_initializer': tf.keras.initializers.HeNormal()
                                           }},
                                      5:
                                          {'name': 'Dense',
                                           'params': {
                                               'units': config_environment.action_space.n,
                                               'activation': None
                                           }}}
            network_dict_value = {6:
                                      {'name': 'Flatten',
                                       'params': {}
                                       },
                                  7:
                                      {'name': 'Dense',
                                       'params': {
                                           'units': 512,
                                           'activation': 'relu',
                                           'kernel_initializer': tf.keras.initializers.HeNormal()
                                       }},
                                  8:
                                      {'name': 'Dense',
                                       'params': {
                                           'units': 1,
                                           'activation': None
                                       }}}
            return [network_dict_base, network_dict_advantage, network_dict_value]
        elif config_representation == 'Graphic':
            network_dict_base = {0:
                                     {'name': 'Conv2D',
                                      'params': {
                                          'filters': 32,
                                          'kernel_size': (8, 8),
                                          'strides': (4, 4),
                                          'activation': 'relu',
                                          'kernel_initializer': tf.keras.initializers.HeNormal()
                                      }},
                                 1:
                                     {'name': 'Conv2D',
                                      'params': {
                                          'filters': 64,
                                          'kernel_size': (4, 4),
                                          'strides': (2, 2),
                                          'activation': 'relu',
                                          'kernel_initializer': tf.keras.initializers.HeNormal()
                                      }},
                                 2:
                                     {'name': 'Conv2D',
                                      'params': {
                                          'filters': 64,
                                          'kernel_size': (3, 3),
                                          'strides': (1, 1),
                                          'activation': 'relu',
                                          'kernel_initializer': tf.keras.initializers.HeNormal()
                                      }}}
            network_dict_advantage = {3:
                                          {'name': 'Flatten',
                                           'params': {}
                                           },
                                      4:
                                          {'name': 'Dense',
                                           'params': {
                                               'units': 512,
                                               'activation': 'relu',
                                               'kernel_initializer': tf.keras.initializers.HeNormal()
                                           }},
                                      5:
                                          {'name': 'Dense',
                                           'params': {
                                               'units': config_environment.action_space.n,
                                               'activation': None
                                           }}}
            network_dict_value = {6:
                                      {'name': 'Flatten',
                                       'params': {}
                                       },
                                  7:
                                      {'name': 'Dense',
                                       'params': {
                                           'units': 512,
                                           'activation': 'relu',
                                           'kernel_initializer': tf.keras.initializers.HeNormal()
                                       }},
                                  8:
                                      {'name': 'Dense',
                                       'params': {
                                           'units': 1,
                                           'activation': None
                                       }}}
            return [network_dict_base, network_dict_advantage, network_dict_value]
        else:
            raise ValueError('Representation provided does not exist!')
    elif config_algorithm == 'SAC':
        if config_representation == 'Tabular':
            actor_dict = {0:
                               {'name': 'Dense',
                                       'params': {
                                           'units': 64,
                                           'activation': 'relu',
                                           'kernel_initializer': tf.keras.initializers.HeNormal()
                                       }},
                           1:
                                     {'name': 'Dense',
                                       'params': {
                                           'units': 32,
                                           'activation': 'relu',
                                           'kernel_initializer': tf.keras.initializers.HeNormal()
                                       }},
                           2:
                                     {'name': 'Dense',
                                       'params': {
                                           'units': 16,
                                           'activation': 'relu',
                                           'kernel_initializer': tf.keras.initializers.HeNormal()
                                       }},
                           3:
                               {'name': 'Flatten',
                                'params': {}
                                },
                           4:
                               {'name': 'Dense',
                                'params': {
                                    'units': 512,
                                    'activation': 'relu',
                                    'kernel_initializer': tf.keras.initializers.HeNormal()
                                }},
                           5:
                               {'name': 'Dense',
                                'params': {
                                    'units': config_environment.action_space.n,
                                    'activation': 'softmax'
                                }}}
            critic_dict = {0:
                               {'name': 'Dense',
                                       'params': {
                                           'units': 64,
                                           'activation': 'relu',
                                           'kernel_initializer': tf.keras.initializers.HeNormal()
                                       }},
                           1:
                                     {'name': 'Dense',
                                       'params': {
                                           'units': 32,
                                           'activation': 'relu',
                                           'kernel_initializer': tf.keras.initializers.HeNormal()
                                       }},
                           2:
                                     {'name': 'Dense',
                                       'params': {
                                           'units': 16,
                                           'activation': 'relu',
                                           'kernel_initializer': tf.keras.initializers.HeNormal()
                                       }},
                           3:
                               {'name': 'Flatten',
                                'params': {}
                                },
                           4:
                               {'name': 'Dense',
                                'params': {
                                    'units': 512,
                                    'activation': 'relu',
                                    'kernel_initializer': tf.keras.initializers.HeNormal()
                                }},
                           5:
                               {'name': 'Dense',
                                'params': {
                                    'units': config_environment.action_space.n,
                                    'activation': None
                                }}}
            return [actor_dict, critic_dict]
        elif config_representation == 'Graphic':
            actor_dict = {0:
                               {'name': 'Conv2D',
                                'params': {
                                    'filters': 32,
                                    'kernel_size': (8, 8),
                                    'strides': (4, 4),
                                    'activation': 'relu',
                                    'kernel_initializer': tf.keras.initializers.HeNormal()
                                }},
                           1:
                               {'name': 'Conv2D',
                                'params': {
                                    'filters': 64,
                                    'kernel_size': (4, 4),
                                    'strides': (2, 2),
                                    'activation': 'relu',
                                    'kernel_initializer': tf.keras.initializers.HeNormal()
                                }},
                           2:
                               {'name': 'Conv2D',
                                'params': {
                                    'filters': 64,
                                    'kernel_size': (3, 3),
                                    'strides': (1, 1),
                                    'activation': 'relu',
                                    'kernel_initializer': tf.keras.initializers.HeNormal()
                                }},
                           3:
                               {'name': 'Flatten',
                                'params': {}
                                },
                           4:
                               {'name': 'Dense',
                                'params': {
                                    'units': 512,
                                    'activation': 'relu',
                                    'kernel_initializer': tf.keras.initializers.HeNormal()
                                }},
                           5:
                               {'name': 'Dense',
                                'params': {
                                    'units': config_environment.action_space.n,
                                    'activation': 'softmax'
                                }}}
            critic_dict = {0:
                               {'name': 'Conv2D',
                                'params': {
                                    'filters': 32,
                                    'kernel_size': (8, 8),
                                    'strides': (4, 4),
                                    'activation': 'relu',
                                    'kernel_initializer': tf.keras.initializers.HeNormal()
                                }},
                           1:
                               {'name': 'Conv2D',
                                'params': {
                                    'filters': 64,
                                    'kernel_size': (4, 4),
                                    'strides': (2, 2),
                                    'activation': 'relu',
                                    'kernel_initializer': tf.keras.initializers.HeNormal()
                                }},
                           2:
                               {'name': 'Conv2D',
                                'params': {
                                    'filters': 64,
                                    'kernel_size': (3, 3),
                                    'strides': (1, 1),
                                    'activation': 'relu',
                                    'kernel_initializer': tf.keras.initializers.HeNormal()
                                }},
                           3:
                               {'name': 'Flatten',
                                'params': {}
                                },
                           4:
                               {'name': 'Dense',
                                'params': {
                                    'units': 512,
                                    'activation': 'relu',
                                    'kernel_initializer': tf.keras.initializers.HeNormal()
                                }},
                           5:
                               {'name': 'Dense',
                                'params': {
                                    'units': config_environment.action_space.n,
                                    'activation': None
                                }}}
            return [actor_dict, critic_dict]
        else:
            raise ValueError('Representation provided does not exist!')
    else: 
        raise ValueError('Algorithm provided does not exist!')


def get_agent(config_env, config_algorithm, config_network_dicts, config_network_path, config_test_params):
    if config_algorithm == 'DDDQN':
        return DDDQNAgent(observation_space=config_env.observation_space,
                      action_space=config_env.action_space,
                      q_net_dict=config_network_dicts,
                      q_target_net_dict=config_network_dicts,
                      double_q=True,
                      dueling_q=True,
                      q_net_update=4,
                      q_target_net_update=10000,
                      discount_factor=0.99,
                      q_net_optimizer=tf.keras.optimizers.Adam,
                      q_target_net_optimizer=tf.keras.optimizers.Adam,
                      q_net_learning_rate=1e-4,
                      q_target_net_learning_rate=1e-4,
                      q_net_loss=tf.keras.losses.Huber(),
                      q_target_net_loss=tf.keras.losses.Huber(),
                      num_episodes=config_test_params['num_episodes'],
                      learning_starts=config_test_params['learning_starts'],
                      memory_size=config_test_params['memory_size'],
                      memory_alpha=config_test_params['memory_alpha'],
                      memory_beta=config_test_params['memory_beta'],
                      max_epsilon=config_test_params['max_epsilon'],
                      min_epsilon=config_test_params['min_epsilon'],
                      epsilon_a=config_test_params['epsilon_a'],
                      epsilon_b=config_test_params['epsilon_b'],
                      epsilon_c=config_test_params['epsilon_c'],
                      batch_size=config_test_params['batch_size'],
                      max_norm_grad=10,
                      tau=1,
                      checkpoint_dir=config_network_path)
    elif config_algorithm == 'SAC':
        return SACAgent(observation_space=config_env.observation_space,
                    action_space=config_env.action_space,
                    actor_net_dict=config_network_dicts[0],
                    critic_net_dict=config_network_dicts[1],
                    net_update=1,
                    discount_factor=0.99,
                    actor_net_optimizer=tf.keras.optimizers.Adam,
                    critic_net_optimizer=tf.keras.optimizers.Adam,
                    actor_net_learning_rate=3e-4,
                    critic_net_learning_rate=3e-4,
                    actor_net_loss=tf.keras.losses.Huber(),
                    critic_net_loss=tf.keras.losses.Huber(),
                    num_episodes=config_test_params['num_episodes'],
                    learning_starts=config_test_params['learning_starts'],
                    memory_size=config_test_params['memory_size'],
                    memory_alpha=config_test_params['memory_alpha'],
                    memory_beta=config_test_params['memory_beta'],
                    max_epsilon=config_test_params['max_epsilon'],
                    min_epsilon=config_test_params['min_epsilon'],
                    epsilon_a=config_test_params['epsilon_a'],
                    epsilon_b=config_test_params['epsilon_b'],
                    epsilon_c=config_test_params['epsilon_c'],
                    batch_size=config_test_params['batch_size'],
                    max_norm_grad=5,
                    tau=0.005,
                    entropy_coeff=None,
                    initial_entropy_coeff=50.,
                    checkpoint_dir=config_network_path)
    else: 
        raise ValueError('Algorithm provided does not exist!')  

# TEST LOOP

In [None]:
# TEST LOOP

for config in itertools.product(*[['SAC'], ['Santorini'], ['Graphic','Tabular'], ['Random']]):
        print('Executing the following config: {}'.format(config))
        algorithm = config[0]
        environment = config[1]
        representation = config[2]
        opponent = config[3]
        agent_turn = 'Random'
        test_params = {
            'num_episodes': 200000,
            'learning_starts': 1000,
            'memory_size': 32768,
            'memory_alpha': 0.7,
            'memory_beta': 0.4,
            'max_epsilon': 1.0,
            'min_epsilon': 0.05,
            'epsilon_a': 0.06,
            'epsilon_b':0.05,
            'epsilon_c':1.5,
            'batch_size':32
        }

        if (environment in ['TicTacToe', 'ConnectFour'] and opponent in ['MonteCarlo']) or (environment in ['Santorini'] and opponent in ['MinMaxRandom']):
            print('Config {} not supported, skipping...'.format(config))
            continue

        config_name = algorithm + '_' + environment + '_' + representation + '_' + opponent + '_' + agent_turn
        data_path = 'drive/MyDrive/DOT/Test/' + config_name + '/'
        gif_path = data_path + 'GIFs/'
        network_path = data_path + 'NetworkParameters/'
        os.mkdir(data_path)
        os.mkdir(gif_path)
        os.mkdir(network_path)

        turn = get_turn(agent_turn)
        env = get_env(environment, representation, turn, opponent)
        network_dicts = get_network_dicts(algorithm, representation, env)
        agent = get_agent(env, algorithm, network_dicts, network_path, test_params)

        wizard = TurnGameTrainWizard(environment_name=environment,
                                     agent=agent,
                                     objective_score=1,
                                     running_average_length=100,
                                     evaluation_steps=200,
                                     evaluation_games=5,
                                     representation=representation,
                                     agent_turn=turn,
                                     agent_turn_test=None,
                                     opponent=opponent,
                                     data_path=data_path,
                                     gif_path=gif_path,
                                     save_agent_checkpoints=False,
                                     montecarlo_init_sim=0,
                                     montecarlo_normal_sim=0)

        wizard.train()
        wizard.agent.save()

Executing the following config: ('SAC', 'Santorini', 'Graphic', 'Random')


  "Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."
0it [00:00, ?it/s]


[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
│ Average Game Length: 14.60                     │
│ Wins: 5 | Losses: 0 | Ties: 0 | Invalid: 0     │
│ Test Running Average:                          │
│ Running Average Score: 0.16                    │
│ Running Average Game Length: 12.18             │
└────────────────────────────────────────────────┘[0m
[32m┌────────────────────────────────────────────────┐
│ History:                                       │
│ Time Step: 116802 | Learning Step: 115803      │
│ Epsilon: 0.05 | Last Loss: -0.5925957560539246 │
│ Test Results:                                  │
│ Average Score: 1.00                            │
│ Average Game Length: 16.20                     │
│ Wins: 5 | Losses: 0 | Ties: 0 | Invalid: 0     │
│ Test Running Average:                          │
│ Running Average Score: 0.19                    │
│ Running Average Game Length: 12.40             │
└────────────────────────────────────────────────┘[0m
[32m

FileExistsError: ignored

# PLOTS

In [None]:
#DISPLAY REWARD PLOT

sns.set(rc={'figure.figsize': (16, 9)})
fig, ax = plt.subplots()
ax.set(ylim=(-2, 1))

for config in itertools.product(*[['DDDQN', 'SAC'], ['TicTacToe'], ['Graphic'], ['MinMaxRandom'],
                                      ['First','Second','Random']]):
    algorithm = config[0]
    environment = config[1]
    representation = config[2]
    opponent = config[3]
    agent_turn = config[4]
    if (environment in ['TicTacToe', 'ConnectFour'] and opponent in ['MonteCarlo']) or (environment in ['Santorini'] and opponent in ['MinMaxRandom']):
            print('Config {} not supported, skipping...'.format(config))
            continue
    config_name = algorithm + '_' + environment + '_' + representation + '_' + opponent + '_' + agent_turn
    data_path = 'drive/MyDrive/DOT/Test/' + config_name + '/scores.pkl'
    f = open(data_path, 'rb')
    results_random = pickle.load(f)
    f.close()
    data_random_reward = pd.DataFrame(np.array([(key, value[i][0]) for key, value in results_random.items() for i in
                                                range(len(value))]), columns=['episode', 'reward'])
    data_random_reward['rolling'] = data_random_reward.reward.rolling(100).mean()
    
    #ax = sns.lineplot(x='episode', y='reward', data=data_random_reward, label=config_name)
    ax = sns.lineplot(x='episode', y='reward', data=data_random_reward, label=config_name+'_rolling')

plt.show()

In [None]:
#DISPLAY LENGTH PLOT

sns.set(rc={'figure.figsize': (16, 9)})
fig, ax = plt.subplots()
ax.set(ylim=(42, 0))

for config in itertools.product(*[['DDDQN', 'SAC'], ['TicTacToe'], ['Tabular', 'Graphic'], ['Random', 'MinMaxRandom', 'MonteCarlo'],
                                      ['First', 'Second', 'Random']]):
    algorithm = config[0]
    environment = config[1]
    representation = config[2]
    opponent = config[3]
    agent_turn = config[4]
    if (environment in ['TicTacToe', 'ConnectFour'] and opponent in ['MonteCarlo']) or (environment in ['Santorini'] and opponent in ['MinMaxRandom']):
            print('Config {} not supported, skipping...'.format(config))
            continue
    config_name = algorithm + '_' + environment + '_' + representation + '_' + opponent + '_' + agent_turn
    data_path = 'drive/MyDrive/DOT/Test/' + config_name + '/scores.pkl'
    f = open(data_path, 'rb')
    results_random = pickle.load(f)
    f.close()
    data_random_length = pd.DataFrame(np.array([(key, value[i][1]) for key, value in results_random.items() for i in
                                                range(len(value))]), columns=['episode', 'length'])
    data_random_length['rolling'] = data_random_length.length.rolling(100).mean()
    
    #ax = sns.lineplot(x='episode', y='length', data=data_random_reward, label=config_name)
    ax = sns.lineplot(x='episode', y='length', data=data_random_length, label=config_name+'_rolling')

plt.show()