# Quixo Players - Analysis and Results

In [2]:
from game import Move, Player, Game
from main import RandomPlayer
from MontecarloPlayer import MonteCarloPlayer
from QPlayer import QlearningPlayer
from MinMaxPlayer import MinMaxPlayer
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

### Random match

In [3]:
g = Game()
player0 = RandomPlayer()
player1 = RandomPlayer()
winner = g.play(player0, player1, verbose=True, debug=False)


*****************
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 

Current player: 0, choose where insert ❌

*****************
⬜ ⬜ ⬜ ⬜ ❌ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 

Current player: 1, choose where insert 🔴

*****************
⬜ ⬜ ⬜ ⬜ ❌ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
🔴 ⬜ ⬜ ⬜ ⬜ 

Current player: 0, choose where insert ❌

*****************
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
🔴 ⬜ ⬜ ⬜ ⬜ 

Current player: 1, choose where insert 🔴

*****************
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
🔴 ⬜ ⬜ ⬜ 🔴 

Current player: 0, choose where insert ❌

*****************
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
🔴 ⬜ ⬜ 🔴 ❌ 

Current player: 1, choose where insert 🔴

*****************
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ 🔴 ⬜ 
🔴 ⬜ ⬜ 🔴 ❌ 

Current player: 0, choose where insert ❌

*****************
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ 🔴 ⬜ 
🔴 ⬜ 🔴 ❌ ❌ 

Current player: 1, choose where insert 🔴

*****************
🔴 ❌ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ 🔴 ⬜ 
🔴 ⬜ 🔴 ❌ ❌

### Utility functions

In [4]:
def collect_results(player1, player2, n_games=100):
    """
    Collects the results of playing n_games between player1 and player2.
    Returns a dictionary with the number of wins for each player.
    e.g. {'Player 0': 50, 'Player 1': 50}
    """
    
    # 1. Initialize the results
    results = defaultdict(int)
    results[0] = 0
    results[1] = 0

    # 2. Play the games
    for _ in tqdm(range(n_games)):
        g = Game()
        winner = g.play(player1, player2, verbose=False, debug=False)
        results[winner] += 1
        
    return results

In [5]:
def plot_results(results: dict, player1: str, player2: str):
    """
    Plots the results of playing n_games between player1 and player2.
    """
    # 1. Extract player numbers and wins
    players = list(results.keys())
    wins = list(results.values())

    # 2. Create a bar plot
    plt.figure(figsize=(6,5))
    plt.bar(players, wins, color=['red', 'black'])
    plt.xticks(players, [player1, player2])
    plt.ylabel('Number of Wins')
    plt.title('Game Results')
    plt.show()

## MinMax Player

The MinMax player is a player that uses the MinMax algorithm to choose the best move. It is a recursive algorithm that explores the game tree until it reaches a terminal state. It then evaluates the utility of each terminal state and propagates the values back up the tree. 


### Performance Evaluation

In the Performance Evaluation section, it shows the results of 2000 games between the chosen player and a random player:
- 1000 games where the player starts
- 1000 games where the player moves second

Let's see how MinMax (depth = 4) plays...

In [6]:
g = Game()
opponent = RandomPlayer()

# 1. Play 1000 games with Minimax moving first

minmax_player = MinMaxPlayer(0)
res1 = collect_results(minmax_player, opponent, n_games=1000)

# 2. Play 1000 games with Minimax moving second
minmax_player = MinMaxPlayer(1)
res2 = collect_results(opponent, minmax_player, n_games=1000)

# 3. Plot the results
plot_results(res1, 'Minmax', 'Random')
plot_results(res2, 'Random', 'Minmax')

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:03<?, ?it/s]


KeyboardInterrupt: 

### Results

In [None]:
print("❌ Results for Minmax moving first:")
print("Wins:", res1[0])
print("Losses:", res1[1])
print(f"Percentage of wins: {res1[0] / (res1[0] + res1[1]) * 100}%")

print("\n🔴 Results for Minmax moving second:")
print("Wins:", res2[1])
print("Losses:", res2[0])
print(f"Percentage of wins: {res2[1] / (res2[0] + res2[1]) * 100}%")

❌ Results for Minmax moving first:
Wins: 999
Losses: 1
Percentage of wins: 99.9%

🔘 Results for Minmax moving second:
Wins: 994
Losses: 6
Percentage of wins: 99.4%


### MinMax example match

In [None]:
g = Game()
minmax_player = MinMaxPlayer(0)
opponent = RandomPlayer()
g.play(minmax_player, opponent, verbose=True, debug=False)

##  Q-Learning Player

The Q-Learning player employs the Q-Learning algorithm, a model-free reinforcement learning technique. It learns by interacting with the environment, updating its Q-values based on the experienced rewards. The player explores the game space, adjusting its strategy over time to maximize the cumulative reward. The algorithm involves an exploration-exploitation trade-off, balancing between discovering new moves and exploiting known strategies. 


### Training

In [None]:
%run qlearning.py

  0%|          | 0/2500 [00:00<?, ?it/s]

100%|██████████| 2500/2500 [00:18<00:00, 137.30it/s]
100%|██████████| 2500/2500 [00:28<00:00, 89.28it/s] 


### Performance evaluation

In [14]:
g = Game()
opponent = RandomPlayer()

# 1. Play 1000 games with Q moving first

q_player = QlearningPlayer(0)
res1 = collect_results(minmax_player, opponent, n_games=1000)

# 2. Play 1000 games with Q moving second
q_player = QlearningPlayer(1)
res2 = collect_results(opponent, minmax_player, n_games=1000)


Charging Q_table...


100%|██████████| 1000/1000 [00:07<00:00, 128.45it/s]


Charging Q_table...


100%|██████████| 1000/1000 [00:07<00:00, 126.36it/s]


### Results

In [18]:

print("❌ Results for Q-Learning moving first:")
print("Wins:", res1[0])
print("Losses:", res1[1])
print(f"Percentage of wins: {res1[0] / (res1[0] + res1[1]) * 100}%")

print("\n🔴 Results for Q-Learning moving second:")
print("Wins:", res2[1])
print("Losses:", res2[0])
print(f"Percentage of wins: {res2[1] / (res2[0] + res2[1]) * 100}%")


❌ Results for Q-Learning moving first:
Wins: 341
Losses: 251
Percentage of wins: 57.60135135135135%

🔴 Results for Q-Learning moving second:
Wins: 290
Losses: 329
Percentage of wins: 46.849757673667206%


### Q-Learning Match Example

In [19]:
g = Game()
g.play(opponent, q_player, verbose=True, debug=False)


*****************
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 

Current player: 0, choose where insert ❌

*****************
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 

Current player: 1, choose where insert 🔴

*****************
⬜ ⬜ 🔴 ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 

Current player: 0, choose where insert ❌

*****************
❌ ⬜ ⬜ 🔴 ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 

Current player: 1, choose where insert 🔴

*****************
❌ ⬜ ⬜ 🔴 ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ 🔴 

Current player: 0, choose where insert ❌

*****************
❌ ⬜ ❌ 🔴 ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ 🔴 

Current player: 1, choose where insert 🔴

*****************
❌ ⬜ ❌ ⬜ 🔴 
⬜ ⬜ ⬜ ⬜ ⬜ 
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ 🔴 

Current player: 0, choose where insert ❌

*****************
❌ ⬜ ❌ ⬜ ❌ 
⬜ ⬜ ⬜ ⬜ 🔴 
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ 🔴 

Current player: 1, choose where insert 🔴

*****************
❌ ⬜ ❌ ⬜ ❌ 
⬜ ⬜ ⬜ ⬜ 🔴 
❌ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
🔴 ⬜ ⬜ ⬜ 🔴

0

##  Montecarlo Player

The Monte Carlo player utilizes the Monte Carlo method, a statistical approach to decision-making. It makes decisions based on the outcomes of random simulations, progressively refining its strategy as more simulations are performed. By sampling possible future states, the player estimates the value of moves and selects the one that leads to the most favorable overall outcome.

### Training and Performance evaluation

In [10]:
# PARAMETERS
epsilon=1         # exploration rate
e_decay=0.99999   # exploration decay rate
e_min=0.3         # minimum exploration rate
alpha=0.2         # learning rate
gamma=0.95        # discount factor

In [17]:
g = Game()
opponent = RandomPlayer()
mc0_player = MonteCarloPlayer(0)
mc1_player = MonteCarloPlayer(1)

training_steps = [100, 250, 500, 1000, 2500, 5000, 10000, 25000, 50000]
training_results_move_first = []
training_results_move_second = []

previous_steps = 0

for steps in tqdm(training_steps):
    
    mc0_player.train(steps - previous_steps)
    mc1_player.train(steps - previous_steps)
    previous_steps = steps
    
    results = collect_results(mc0_player, opponent, n_games=100)
    training_results_move_first.append(results)

    results = collect_results(opponent, mc1_player, n_games=100)
    training_results_move_second.append(results)

    # Print the results
    print(f"Training steps: {steps}")
    print(f"❌ Results for Monte Carlo moving first: wins = {training_results_move_first[-1][0]}, losses = {training_results_move_first[-1][1]}")
    print(f"🔴 Results for Monte Carlo moving second: wins = {training_results_move_second[-1][1]}, losses = {training_results_move_second[-1][0]}")

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

ERROR: The Monte Carlo player tried to make an illegal move
ERROR: The Monte Carlo player tried to make an illegal move
ERROR: The Monte Carlo player tried to make an illegal move
ERROR: The Monte Carlo player tried to make an illegal move
ERROR: The Monte Carlo player tried to make an illegal move
ERROR: The Monte Carlo player tried to make an illegal move
ERROR: The Monte Carlo player tried to make an illegal move
ERROR: The Monte Carlo player tried to make an illegal move
ERROR: The Monte Carlo player tried to make an illegal move
ERROR: The Monte Carlo player tried to make an illegal move
ERROR: The Monte Carlo player tried to make an illegal move
ERROR: The Monte Carlo player tried to make an illegal move
ERROR: The Monte Carlo player tried to make an illegal move





AttributeError: 'MonteCarloPlayer' object has no attribute '_get_end_reward'

### Results

In [None]:

# Print the results
win0 = training_results_move_first[-1][0]
loss0 =  training_results_move_first[-1][1]
print("❌ Results for Monte Carlo moving first:")
print("Wins:", {win0})
print("Losses:", {loss0})
print(f"Percentage of wins: {win0 / (win0 + loss0) * 100}%")

win1 = training_results_move_second[-1][0]
loss1 =  training_results_move_second[-1][1]
print("❌ Results for Monte Carlo moving first:")
print("Wins:", {win1})
print("Losses:", {loss1})
print(f"Percentage of wins: {win1 / (win1 + loss1) * 100}%")

### Montecarlo example match

In [None]:
g = Game()
g.play(mc0_player, opponent, verbose=True, debug=False)


*****************
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 

Current player: 0, choose where insert ❌

*****************
⬜ ⬜ ⬜ ⬜ ❌ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 

Current player: 1, choose where insert 🔴

*****************
⬜ ⬜ 🔴 ⬜ ❌ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 

Current player: 0, choose where insert ❌

*****************
⬜ ⬜ 🔴 ⬜ ❌ 
⬜ ⬜ ⬜ ⬜ ❌ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 

Current player: 1, choose where insert 🔴

*****************
⬜ ⬜ 🔴 ⬜ ❌ 
⬜ ⬜ ⬜ ❌ 🔴 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 

Current player: 0, choose where insert ❌

*****************
⬜ ⬜ 🔴 ⬜ ❌ 
⬜ ⬜ ⬜ ❌ 🔴 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
❌ ⬜ ⬜ ⬜ ⬜ 

Current player: 1, choose where insert 🔴

*****************
⬜ ⬜ 🔴 ⬜ ❌ 
⬜ ⬜ ⬜ ❌ 🔴 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
🔴 ❌ ⬜ ⬜ ⬜ 

Current player: 0, choose where insert ❌

*****************
⬜ ⬜ 🔴 ⬜ ❌ 
⬜ ⬜ ⬜ ❌ 🔴 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
❌ 🔴 ❌ ⬜ ⬜ 

Current player: 1, choose where insert 🔴

*****************
⬜ ⬜ 🔴 ⬜ ❌ 
⬜ ⬜ ⬜ ❌ 🔴 
🔴 ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
❌ 🔴 ❌ ⬜ ⬜

0