Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

In [None]:
from itertools import combinations
from collections import namedtuple, defaultdict
from random import choice
from copy import deepcopy

from tqdm.auto import tqdm
import numpy as np

import pickle
import os

from lab10 import *

## Basic Problem

### Training

In [None]:
epsilon = 0.001
training_steps = 500_000

path = os.path.join("value_dictionaries", f"value_dictionary_{epsilon}_{training_steps}.pkl")

# if file exists, load it
try:
    with open(path, 'rb') as f:
        value_dictionary = pickle.load(f)
except FileNotFoundError:
    value_dictionary, hit_state = train_loop(epsilon, training_steps)
    with open(path, "wb") as f:
        pickle.dump(value_dictionary, f)

In [None]:
sorted(value_dictionary.items(), key=lambda e: e[1], reverse=True)[:10]

In [None]:
res = play_game(agent_move, 
            random_move, 
            value_dictionary=value_dictionary,
            agent_player=1)

if res == "Player 1":
    print('Player 1 wins')
elif res == "Player 2":
    print('Player 2 wins')
else:
    print('Draw')

### Results

#### Random vs. Random

In [None]:
player1 = random_move
player2 = random_move
agent_player = -1

print('Random player(p1) vs random player(p2)')
print()

results = evaluate(player1, 
                player2,
                value_dictionary=value_dictionary, 
                agent_player=agent_player,
                games=10_000)
print_results(results)

#### Agent vs. Random

In [None]:
player1 = agent_move
player2 = random_move
agent_player = 1

print('Agent(p1) vs random player(p2)')
print()

results = evaluate(player1, 
                player2,
                value_dictionary=value_dictionary, 
                agent_player=agent_player,
                games=10_000)
print_results(results)

#### Random vs. agent

In [None]:
player1 = random_move
player2 = agent_move
agent_player = 2

print('Random player(p1) vs agent(p2)')
print()

results = evaluate(player1, 
                player2, 
                value_dictionary=value_dictionary, 
                agent_player=agent_player,
                games=10_000)
print_results(results)

In [None]:
player1 = random_move
player2 = agent_move
agent_player = 2

print('Random player(p1) vs agent(p2)')
print()

results = evaluate(player1, 
                player2, 
                value_dictionary=value_dictionary, 
                agent_player=agent_player,
                games=10_000, 
                trick=True)
print_results(results)

## Generalisation of the problem also on O 

### Training

In [None]:
epsilon = 0.001
training_steps = 500_000

path = os.path.join("value_dictionaries", f"complete_value_dictionary_{epsilon}_{training_steps}.pkl")

# if file exists, load it
try:
    with open(path, 'rb') as f:
        complete_value_dictionary = pickle.load(f)
except FileNotFoundError:
    complete_value_dictionary, complete_hit_state = complete_train_loop(epsilon, training_steps)
    with open(path, "wb") as f:
        pickle.dump(complete_value_dictionary, f)

In [None]:
sorted(complete_value_dictionary["x"].items(), key=lambda e: e[1], reverse=True)[:10]

### Results

In [None]:
player1 = complete_agent_move
player2 = random_move
agent_player = 1

print('Random player(p1) vs agent(p2)') if agent_player == 2 else print('Agent(p1) vs random player(p2)')
print()

results = complete_evaluate(player1, 
                        player2, 
                        complete_value_dictionary=complete_value_dictionary, 
                        agent_player=agent_player,
                        games=10_000)
print_results(results)

In [None]:
player1 = random_move
player2 = complete_agent_move
agent_player = 2

print('Random player(p1) vs agent(p2)') if agent_player == 2 else print('Agent(p1) vs random player(p2)')
print()

results = complete_evaluate(player1, 
                        player2, 
                        complete_value_dictionary=complete_value_dictionary, 
                        agent_player=agent_player,
                        games=10_000)
print_results(results)

## Hyperparameter tuning

### Parameters Grid

In [None]:
epsilon_list = 0.001
training_steps_list = [250, 1_000, 2_000, 3_000, 4_000, 5_000, 7_500, 10_000, 25_000, 500_000]

### Training Loops

In [None]:
for training_steps in training_steps_list: 
    path = os.path.join("value_dictionaries", f"complete_value_dictionary_{epsilon}_{training_steps}.pkl")
    try:
        with open(path, 'rb') as f:
            complete_value_dictionary = pickle.load(f)
    except FileNotFoundError:
        complete_value_dictionary, complete_hit_state = complete_train_loop(epsilon, training_steps)
        with open(path, "wb") as f:
            pickle.dump(complete_value_dictionary, f)

### Results

Agent vs. Random

In [None]:
# compare the different methods and see percentage of wins for each 

results = {}

for training_steps in training_steps_list: 
    path = os.path.join("value_dictionaries", f"complete_value_dictionary_{epsilon}_{training_steps}.pkl")
    with open(path, 'rb') as f:
        complete_value_dictionary = pickle.load(f)
        
    player1 = complete_agent_move
    player2 = random_move
    agent_player = 1
    result = complete_evaluate(player1, 
                            player2, 
                            complete_value_dictionary=complete_value_dictionary, 
                            agent_player=agent_player,
                            games=10_000)
    k, v = np.unique(result, return_counts=True)
    res = {key: value for key, value in zip(k, v)}
    results[(epsilon, training_steps)] = res
        

In [None]:
results

In [None]:
plt_results = defaultdict(dict)
for k in results.keys():
    plt_results[k]["Wins"] = results[k]["Player 1"] / 10_000
    plt_results[k]["Draws"] = results[k]["Draw"] / 10_000
    plt_results[k]["Losses"] = results[k]["Player 2"] / 10_000 if "Player 2" in results[k] else 0

In [None]:
# print a table of the results

for k in plt_results.keys():
    print(f"Epsilon: {k[0]}, Training steps: {k[1]}")
    print(f"Wins: {plt_results[k]['Wins']}, Draws: {plt_results[k]['Draws']}, Losses: {plt_results[k]['Losses']}")
    print()

In [None]:
# barplot of results, each value has a dictionary with win, losses and draws

import matplotlib.pyplot as plt

fig, ax = plt.subplots(4, 6, figsize=(20, 20))

for i, epsilon in enumerate(epsilon_list):
    for j, training_steps in enumerate(training_steps_list):
        ax[i, j].bar(plt_results[(epsilon, training_steps)].keys(), plt_results[(epsilon, training_steps)].values())
        ax[i, j].set_title(f"epsilon: {epsilon}, training steps: {training_steps}")
        ax[i, j].set_xlabel("Result")
        ax[i, j].set_ylabel("Count")
        ax[i, j].set_ylim(0, 1)
        ax[i, j].set_xticks([0, 1, 2])
        ax[i, j].set_xticklabels(["Win", "Draw", "Loss"])
        ax[i, j].grid(axis="y")
plt.tight_layout()
plt.show()

Random vs. Agent

In [None]:
# compare the different methods and see percentage of wins for each 

results = {}

for epsilon in epsilon_list:
    for training_steps in training_steps_list: 
        path = os.path.join("value_dictionaries", f"complete_value_dictionary_{epsilon}_{training_steps}.pkl")
        with open(path, 'rb') as f:
            complete_value_dictionary = pickle.load(f)
            
        player1 = random_move
        player2 = complete_agent_move
        agent_player = 2
        result = complete_evaluate(player1, 
                                player2, 
                                complete_value_dictionary=complete_value_dictionary, 
                                agent_player=agent_player,
                                games=10_000)
        k, v = np.unique(result, return_counts=True)
        res = {key: value for key, value in zip(k, v)}
        results[(epsilon, training_steps)] = res        

In [None]:
plt_results = defaultdict(dict)
for k in results.keys():
    plt_results[k]["Wins"] = results[k]["Player 1"] / 10_000
    plt_results[k]["Draws"] = results[k]["Draw"] / 10_000
    plt_results[k]["Losses"] = results[k]["Player 2"] / 10_000 if "Player 2" in results[k] else 0

In [None]:
# print a table of the results

for k in plt_results.keys():
    print(f"Epsilon: {k[0]}, Training steps: {k[1]}")
    print(f"Wins: {plt_results[k]['Wins']}, Draws: {plt_results[k]['Draws']}, Losses: {plt_results[k]['Losses']}")
    print()

In [None]:
# barplot of results, each value has a dictionary with win, losses and draws

import matplotlib.pyplot as plt

fig, ax = plt.subplots(4, 4, figsize=(20, 20))

for i, epsilon in enumerate(epsilon_list):
    for j, training_steps in enumerate(training_steps_list):
        ax[i, j].bar(plt_results[(epsilon, training_steps)].keys(), plt_results[(epsilon, training_steps)].values())
        ax[i, j].set_title(f"epsilon: {epsilon}, training steps: {training_steps}")
        ax[i, j].set_xlabel("Result")
        ax[i, j].set_ylabel("Count")
        ax[i, j].set_ylim(0, 1)
        ax[i, j].set_xticks([0, 1, 2])
        ax[i, j].set_xticklabels(["Win", "Draw", "Loss"])
        ax[i, j].grid(axis="y")
plt.tight_layout()
plt.show()