# Lab 3: Policy Search

## Task

Write agents able to play [_Nim_](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., _subtraction game_).

The player **taking the last object wins**.

- Task3.1: An agent using fixed rules based on _nim-sum_ (i.e., an _expert system_)
- Task3.2: An agent using evolved rules
- Task3.3: An agent using minmax
- Task3.4: An agent using reinforcement learning

## Instructions

- Create the directory `lab3` inside the course repo
- Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

- Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
- [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.

## Deadlines ([AoE](https://en.wikipedia.org/wiki/Anywhere_on_Earth))

- Sunday, December 4th for Task3.1 and Task3.2
- Sunday, December 11th for Task3.3 and Task3.4
- Sunday, December 18th for all reviews


In [1]:
from typing import Callable
from copy import deepcopy
from operator import xor
from itertools import accumulate
import random
from collections import namedtuple
import logging
from tqdm import tqdm


## The _Nim_ and _Nimply_ classes


In [2]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [3]:
# Nim class from the lecture
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def k(self) -> int:
        return self._k

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        # assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies


In [4]:
# pure_random from the lecture
def pure_random(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    if state.k == None:
        num_objects = random.randint(1, state.rows[row])
    elif state.rows[row] < state.k:
        num_objects = random.randint(1, state.rows[row])
    else:
        num_objects = random.randint(1, state.k)
    return Nimply(row, num_objects)


In [5]:
# gabriele's idea from the lecture
def gabriele(state: Nim) -> Nimply:
    # pick always the maximum possible number of the lowest row
    possible_moves = [(r, o) for r, c in enumerate(state.rows)
                      for o in range(1, c + 1)]
    if state.k != None:
        possible_moves = [p for p in possible_moves if p[1] <= state.k]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


#### My strategies


In [6]:
# my strategy is to play high numbers at first, until, there are only 10 sticks left
# and then to play 1 or 2 sticks each time, depending on how many are left
def krzysztof(state: Nim) -> Nimply:
    sum_rows = sum(row for row in state.rows)

    if sum_rows > 10:
        row = max((x for x in enumerate(state.rows)
                   if x[1] > 0), key=lambda y: y[1])[0]
        if state.k == None:
            num_objects = state.rows[row]
        elif state.rows[row] < state.k:
            num_objects = state.rows[row]
        else:
            num_objects = state.k
        ply = Nimply(row, num_objects)

    elif (sum_rows % 2) == 0:
        # if the number of sticks is even I'm taking 2
        row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
        ply = Nimply(row, 2)

    else:
        # if the number of sticks is odd I'm taking 1
        row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
        ply = Nimply(row, 1)

    return ply


In [7]:
def longest_row(state: Nim) -> Nimply:
    # chooses rows with the most elements
    row = max((x for x in enumerate(state.rows)
              if x[1] > 0), key=lambda y: y[1])[0]
    if state.k == None:
        num_objects = random.randint(1, state.rows[row])
    elif state.rows[row] < state.k:
        num_objects = random.randint(1, state.rows[row])
    else:
        num_objects = random.randint(1, state.k)
    return Nimply(row, random.randint(1, num_objects))


In [8]:
def shortest_row(state: Nim) -> Nimply:
    # chooses rows with the least elements
    row = min((x for x in enumerate(state.rows)
              if x[1] > 0), key=lambda y: y[1])[0]
    if state.k == None:
        num_objects = random.randint(1, state.rows[row])
    elif state.rows[row] < state.k:
        num_objects = random.randint(1, state.rows[row])
    else:
        num_objects = random.randint(1, state.k)
    return Nimply(row, random.randint(1, num_objects))


In [9]:
def take_one(state: Nim) -> Nimply:
    # takes one element from the first non-empty row
    row = tuple((x for x in enumerate(state.rows) if x[1] > 0))[0][0]
    return Nimply(row, 1)


## Optimal strategy


In [10]:
# optimal strategy using nim sum (a bit different implementation than the lecture)
def nim_sum(state: Nim) -> int:
    *_, result = accumulate(state.rows, xor)
    return result


def optimal_strategy(state: Nim) -> Nimply:
    # retrieve the possible moves
    possible_moves = [(r, o) for r, c in enumerate(state.rows)
                      for o in range(1, c + 1)]
    if state.k != None:
        possible_moves = [p for p in possible_moves if p[1] <= state.k]

    # check the values of nim_sum after all possible moves
    possible_moves_optimal = list()

    for move in possible_moves:
        temp_state = deepcopy(state)
        temp_state.nimming(Nimply(move[0], move[1]))
        if nim_sum(temp_state) == 0:
            possible_moves_optimal.append(move)

    if possible_moves_optimal == []:
        chosen_move = random.choice(possible_moves)
    else:
        chosen_move = random.choice(possible_moves_optimal)

    return Nimply(chosen_move[0], chosen_move[1])


## Evaluate


In [11]:
# games are ran NUM_MATCHES times to check the average result
def evaluate(NUM_MATCHES: int, NIM_SIZE: int, strategy0: Callable, strategy1: Callable, k=None) -> float:
    opponent = (strategy0, strategy1)
    won = 0

    for m in range(NUM_MATCHES):
        nim = Nim(NIM_SIZE, k)
        player = 0
        while nim:
            ply = opponent[player](nim)
            nim.nimming(ply)
            player = 1 - player
        if player == 0:
            won += 1
    return won / NUM_MATCHES


# Task 3.1

## Evaluate my strategy (krzysztof)


#### Against pure_random


In [12]:
result_list = list()
for NIM_SIZE in [3, 4, 5, 8, 10, 15, 20]:
    print("NIM_SIZE:", NIM_SIZE)
    for NUM_MATCHES in [10, 100, 1000]:
        for k in [None, 5, 3, 2]:
            result = evaluate(NUM_MATCHES, NIM_SIZE, pure_random, krzysztof, k)
            result_list.append(result)
            print("\tNUM_MATHES:", NUM_MATCHES, "k:", k, "result", result)

print("Average of all above results:", round(
    sum(result_list)/len(result_list), 3))


NIM_SIZE: 3
	NUM_MATHES: 10 k: None result 0.8
	NUM_MATHES: 10 k: 5 result 0.9
	NUM_MATHES: 10 k: 3 result 0.7
	NUM_MATHES: 10 k: 2 result 0.7
	NUM_MATHES: 100 k: None result 0.73
	NUM_MATHES: 100 k: 5 result 0.72
	NUM_MATHES: 100 k: 3 result 0.73
	NUM_MATHES: 100 k: 2 result 0.72
	NUM_MATHES: 1000 k: None result 0.68
	NUM_MATHES: 1000 k: 5 result 0.679
	NUM_MATHES: 1000 k: 3 result 0.755
	NUM_MATHES: 1000 k: 2 result 0.752
NIM_SIZE: 4
	NUM_MATHES: 10 k: None result 0.5
	NUM_MATHES: 10 k: 5 result 0.8
	NUM_MATHES: 10 k: 3 result 0.8
	NUM_MATHES: 10 k: 2 result 0.9
	NUM_MATHES: 100 k: None result 0.73
	NUM_MATHES: 100 k: 5 result 0.75
	NUM_MATHES: 100 k: 3 result 0.76
	NUM_MATHES: 100 k: 2 result 0.8
	NUM_MATHES: 1000 k: None result 0.7
	NUM_MATHES: 1000 k: 5 result 0.727
	NUM_MATHES: 1000 k: 3 result 0.725
	NUM_MATHES: 1000 k: 2 result 0.762
NIM_SIZE: 5
	NUM_MATHES: 10 k: None result 0.7
	NUM_MATHES: 10 k: 5 result 0.7
	NUM_MATHES: 10 k: 3 result 0.8
	NUM_MATHES: 10 k: 2 result 0.9
	NU

# Task 3.2


## Pick partial strategy

#### For evolvable strategy


In [13]:
# function used to determine which partial strategy I'm using every iteration
# based on scores assigned for each strategy (longest_row, shortest_row ... etc.)
def choose_strategy(genome: dict) -> int:
    # multiply scores to get int values
    genome_100 = tuple(int(genome.get(g) * 100) for g in genome)

    # list of values of scores with numbers assigned to each strategy
    prob_list = list()
    # strategy value changing at each iteration
    strategy_value = 1
    for i in genome_100:
        for j in range(i):
            prob_list.append(strategy_value)
        strategy_value += 1

    # returned values are equivalent to:
    # longest_row: 1
    # shortest_row: 2
    # take_one: 3
    # gabriele_strategy: 4
    # pure_random_strategy: 5
    # krzysztof_strategy: 6
    # nim_sum: 7
    # at the end we choose random strategy but the distribution of them
    # in the list is weighted

    return random.choice(prob_list)


## Evolvable strategy


In [14]:
def make_strategy(genome: dict) -> Callable:
    # evolvable strategy that takes scores for different partial strategies as parameters
    def evolvable(state: Nim) -> Nimply:

        choice = choose_strategy(genome)

        if choice == 1:  # longest_row
            # chooses rows with the most elements
            ply = longest_row(state)

        if choice == 2:  # shortest_row
            # chooses rows with the least elements
            ply = shortest_row(state)

        if choice == 3:  # take_one
            # takes one element from the first non-empty row
            ply = take_one(state)

        if choice == 4:  # gabriele_strategy
            # implements gabriele_strategy
            ply = gabriele(state)

        if choice == 5:  # pure_random_strategy
            # implements pure_random_strategy
            ply = pure_random(state)

        if choice == 6:  # krzysztof_strategy
            # implements krzysztof_strategy
            ply = krzysztof(state)

        if choice == 7:  # nim_sum
            # implements nim_sum
            ply = optimal_strategy(state)

        return ply

    return evolvable


In [15]:
# choose the strategy that will be used to estimate the success rate of evaluated strategy
# EVALUATION_STRATEGY = pure_random
EVALUATION_STRATEGY = optimal_strategy
k = 3
# k = None


In [16]:
# trial run for comparison of EVALUATION_STRATEGY and our strategy
for _ in range(10):
    print(evaluate(100, 4, EVALUATION_STRATEGY,
          make_strategy({"longest_row": 0.5, "shortest_row": 0.5, "take_one": 0.5,
                         "gabriele_strategy": 0.5, "pure_random_strategy": 0.5, "krzysztof_strategy": 0.5, "nim_sum": 0.5}), k))


0.11
0.15
0.2
0.2
0.17
0.13
0.16
0.17
0.19
0.13


## Evolution

#### With nim-sum as an option


In [17]:

def fitness(genome, NUM_MATCHES: int, NIM_SIZE: int, EVALUATION_STRATEGY: Callable, strategy: Callable, k=None):
    # calculates the fitness through evaluate function
    return evaluate(NUM_MATCHES, NIM_SIZE, EVALUATION_STRATEGY, strategy, k)


def check_duplicates(genome, population):
    # prevents from creating duplicates
    population_genome = [p.genome for p in population]
    return (genome in population_genome)


def tournament(population, tournament_size=2):
    # chooses two parents from the population
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)


def cross_over(g1, g2):
    # normal crossover
    cut = random.randint(0, len(g1))
    return g1[:cut] + g2[cut:]


def average_cross_over(g1, g2):
    # another way of crossover, takes averages of genomes of two parents
    g_new = tuple(round((g1[i] + g2[i])/2, 4) for i in range(len(g1)))
    return g_new


def mutation(g):
    # normal mutation
    point = random.randint(0, len(g) - 1)
    return g[:point] + (round(random.random(), 4),) + g[point + 1:]


def print_order_of_params(num):
    # helps in displaying results
    if num == 0:
        print("genome=(longest_row, shortest_row, take_one, gabriele_strategy, pure_random_strategy, krzysztof_strategy, nim_sum)")
    else:
        print("genome=(longest_row, shortest_row, take_one, gabriele_strategy, pure_random_strategy, krzysztof_strategy)")


#### Initial population


In [18]:
POPULATION_SIZE = 10
NUM_GENERATIONS = 100
OFFSPRING_SIZE = 5

NUM_MATCHES = 100
NIM_SIZE = 5
# EVALUATION_STRATEGY = pure_random
EVALUATION_STRATEGY = optimal_strategy
k = 3


In [19]:
population = list()
Individual = namedtuple("Individual", ["genome", "fitness"])

i = 0
while i < POPULATION_SIZE:
    # genome is a tuple of probabilities of using: longest_row, shortest_row, take_one,
    # gabriele_strategy, pure_random_strategy, krzysztof_strategy or nim_sum strategies
    genome = tuple([round(random.random(), 2) for _ in range(7)])

    # prevents from creating duplicates
    if check_duplicates(genome, population):
        i -= 1
    else:
        population.append(
            Individual(
                genome,
                fitness(
                    genome,
                    NUM_MATCHES,
                    NIM_SIZE,
                    EVALUATION_STRATEGY,
                    make_strategy(
                        {"longest_row": genome[0], "shortest_row": genome[1], "take_one": genome[2],
                         "gabriele_strategy": genome[3], "pure_random_strategy": genome[4], "krzysztof_strategy": genome[5], "nim_sum": genome[6]}),
                    k
                ),
            )
        )
    i += 1

population = sorted(population, key=lambda i: -i.fitness)[:POPULATION_SIZE]

print_order_of_params(0)

for p in population:
    print(p)


genome=(longest_row, shortest_row, take_one, gabriele_strategy, pure_random_strategy, krzysztof_strategy, nim_sum)
Individual(genome=(0.82, 0.17, 0.69, 0.31, 0.36, 0.8, 0.3), fitness=0.27)
Individual(genome=(0.62, 0.26, 0.93, 0.13, 0.19, 0.9, 0.61), fitness=0.27)
Individual(genome=(0.96, 0.46, 0.54, 0.44, 0.19, 0.97, 0.48), fitness=0.2)
Individual(genome=(0.78, 0.52, 0.41, 0.87, 0.14, 0.73, 0.45), fitness=0.18)
Individual(genome=(0.06, 0.27, 0.11, 0.81, 0.08, 0.44, 0.44), fitness=0.16)
Individual(genome=(0.67, 0.36, 0.33, 0.05, 0.2, 0.45, 0.79), fitness=0.14)
Individual(genome=(0.01, 0.78, 0.62, 0.49, 0.77, 0.25, 0.52), fitness=0.12)
Individual(genome=(0.34, 0.54, 0.17, 0.33, 0.79, 0.22, 0.1), fitness=0.1)
Individual(genome=(0.97, 0.9, 0.96, 0.94, 0.88, 0.46, 0.32), fitness=0.08)
Individual(genome=(0.53, 0.86, 0.33, 0.18, 0.28, 0.06, 0.1), fitness=0.05)


#### Generating offspring


In [20]:
for g in tqdm(range(NUM_GENERATIONS)):
    offspring = list()
    i = 0
    while i < OFFSPRING_SIZE:

        p1 = tournament(population)
        p2 = tournament(population)

        # randomly choose form of crossover
        if random.random() < 0.4:
            o = average_cross_over(p1.genome, p2.genome)
        else:
            o = cross_over(p1.genome, p2.genome)

        # mutate
        if random.random() < 0.5:
            o = mutation(o)

        # prevents from creating duplicates
        if check_duplicates(o, population) or check_duplicates(o, offspring):
            i -= 1
        else:
            f = fitness(
                o,
                NUM_MATCHES,
                NIM_SIZE,
                EVALUATION_STRATEGY,
                make_strategy(
                    {"longest_row": o[0], "shortest_row": o[1], "take_one": o[2],
                     "gabriele_strategy": o[3], "pure_random_strategy": o[4], "krzysztof_strategy": o[5], "nim_sum": o[6]}),
                k
            )
            offspring.append(Individual(o, f))
        i += 1

    population += offspring
    population = sorted(population, key=lambda i: -i.fitness)[:POPULATION_SIZE]


100%|██████████| 100/100 [00:52<00:00,  1.92it/s]


In [21]:
print_order_of_params(0)

for p in population:
    print(p)


genome=(longest_row, shortest_row, take_one, gabriele_strategy, pure_random_strategy, krzysztof_strategy, nim_sum)
Individual(genome=(0.1066, 0.2136, 0.1131, 0.13, 0.0619, 0.97, 0.0345), fitness=0.61)
Individual(genome=(0.2171, 0.2136, 0.1131, 0.13, 0.0659, 0.97, 0.0398), fitness=0.6)
Individual(genome=(0.1965, 0.2238, 0.2169, 0.13, 0.0619, 0.97, 0.0345), fitness=0.56)
Individual(genome=(0.0166, 0.2034, 0.0093, 0.13, 0.0779, 0.97, 0.0559), fitness=0.55)
Individual(genome=(0.1965, 0.1692, 0.2169, 0.13, 0.0659, 0.97, 0.0398), fitness=0.55)
Individual(genome=(0.1965, 0.1692, 0.2169, 0.13, 0.0619, 0.97, 0.0345), fitness=0.54)
Individual(genome=(0.0166, 0.2034, 0.0093, 0.13, 0.0619, 0.97, 0.0345), fitness=0.54)
Individual(genome=(0.2376, 0.2034, 0.0093, 0.13, 0.0699, 0.97, 0.0452), fitness=0.54)
Individual(genome=(0.1721, 0.2085, 0.0612, 0.13, 0.0659, 0.97, 0.0398), fitness=0.54)
Individual(genome=(0.1271, 0.2034, 0.0093, 0.13, 0.0699, 0.97, 0.0452), fitness=0.53)


## Evolution

#### Without nim-sum


#### Initial population


In [22]:
POPULATION_SIZE = 10
NUM_GENERATIONS = 100
OFFSPRING_SIZE = 5

NUM_MATCHES = 100
NIM_SIZE = 5
# EVALUATION_STRATEGY = pure_random
EVALUATION_STRATEGY = optimal_strategy
k = 3


In [23]:
population = list()
Individual = namedtuple("Individual", ["genome", "fitness"])

i = 0
while i < POPULATION_SIZE:
    # genome is a tuple of probabilities of using: longest_row, shortest_row, take_one,
    # gabriele_strategy, pure_random_strategy or krzysztof_strategy strategies
    genome = tuple([round(random.random(), 2) for _ in range(6)])

    # prevents from creating duplicates
    if check_duplicates(genome, population):
        i -= 1
    else:
        population.append(
            Individual(
                genome,
                fitness(
                    genome,
                    NUM_MATCHES,
                    NIM_SIZE,
                    EVALUATION_STRATEGY,
                    make_strategy(
                        {"longest_row": genome[0], "shortest_row": genome[1], "take_one": genome[2],
                         "gabriele_strategy": genome[3], "pure_random_strategy": genome[4], "krzysztof_strategy": genome[5]}),
                    k
                ),
            )
        )
    i += 1

population = sorted(population, key=lambda i: -i.fitness)[:POPULATION_SIZE]

print_order_of_params(1)

for p in population:
    print(p)


genome=(longest_row, shortest_row, take_one, gabriele_strategy, pure_random_strategy, krzysztof_strategy)
Individual(genome=(0.78, 0.74, 0.39, 0.0, 0.35, 0.94), fitness=0.26)
Individual(genome=(0.39, 0.52, 0.02, 0.07, 0.28, 0.39), fitness=0.24)
Individual(genome=(0.08, 0.56, 0.71, 0.29, 0.99, 0.99), fitness=0.19)
Individual(genome=(0.5, 0.86, 0.17, 0.44, 0.64, 0.84), fitness=0.16)
Individual(genome=(0.47, 0.35, 0.68, 0.71, 0.9, 0.73), fitness=0.15)
Individual(genome=(0.15, 0.02, 0.91, 0.21, 0.92, 0.38), fitness=0.13)
Individual(genome=(0.43, 0.48, 0.41, 0.62, 0.97, 0.61), fitness=0.13)
Individual(genome=(0.42, 0.53, 0.0, 0.25, 0.3, 0.16), fitness=0.11)
Individual(genome=(0.54, 0.04, 0.91, 0.14, 0.06, 0.18), fitness=0.09)
Individual(genome=(0.78, 0.17, 0.7, 0.72, 0.81, 0.25), fitness=0.09)


#### Generating offspring


In [24]:
for g in tqdm(range(NUM_GENERATIONS)):
    offspring = list()
    i = 0
    while i < OFFSPRING_SIZE:

        p1 = tournament(population)
        p2 = tournament(population)

        # randomly choose form of crossover
        if random.random() < 0.4:
            o = average_cross_over(p1.genome, p2.genome)
        else:
            o = cross_over(p1.genome, p2.genome)

        # mutate
        if random.random() < 0.5:
            o = mutation(o)

        # prevents from creating duplicates
        if check_duplicates(o, population) or check_duplicates(o, offspring):
            i -= 1
        else:
            f = fitness(
                o,
                NUM_MATCHES,
                NIM_SIZE,
                EVALUATION_STRATEGY,
                make_strategy(
                    {"longest_row": o[0], "shortest_row": o[1], "take_one": o[2],
                     "gabriele_strategy": o[3], "pure_random_strategy": o[4], "krzysztof_strategy": o[5]}),
                k
            )
            offspring.append(Individual(o, f))
        i += 1

    population += offspring
    population = sorted(population, key=lambda i: -i.fitness)[:POPULATION_SIZE]


100%|██████████| 100/100 [00:43<00:00,  2.29it/s]


In [25]:
print_order_of_params(1)

for p in population:
    print(p)


genome=(longest_row, shortest_row, take_one, gabriele_strategy, pure_random_strategy, krzysztof_strategy)
Individual(genome=(0.0789, 0.1077, 0.0345, 0.0, 0.0716, 0.8746), fitness=0.75)
Individual(genome=(0.0789, 0.1077, 0.0345, 0.0, 0.0341, 0.9777), fitness=0.75)
Individual(genome=(0.0789, 0.1077, 0.0345, 0.0, 0.0341, 0.942), fitness=0.74)
Individual(genome=(0.0781, 0.1077, 0.0345, 0.0, 0.0483, 0.9777), fitness=0.72)
Individual(genome=(0.0781, 0.1077, 0.0823, 0.0, 0.06, 0.8746), fitness=0.71)
Individual(genome=(0.0789, 0.1077, 0.0345, 0.0, 0.0483, 0.9777), fitness=0.71)
Individual(genome=(0.0781, 0.1077, 0.0823, 0.0, 0.06, 0.767), fitness=0.71)
Individual(genome=(0.0778, 0.1077, 0.1407, 0.0, 0.0483, 0.8746), fitness=0.7)
Individual(genome=(0.08, 0.1077, 0.162, 0.0, 0.0483, 0.9496), fitness=0.7)
Individual(genome=(0.0789, 0.1374, 0.0345, 0.0, 0.0483, 0.9777), fitness=0.7)


## Oversimplified match


In [26]:
logging.getLogger().setLevel(logging.DEBUG)

strategy = (pure_random, make_strategy(
    {"longest_row": 0.5, "shortest_row": 0.5, "take_one": 0.5,
     "gabriele_strategy": 0.5, "pure_random_strategy": 0.5, "krzysztof_strategy": 0.5, "nim_sum": 0.5}))

nim = Nim(5, k)


In [27]:
logging.debug(f"status: Initial board  -> {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    nim.nimming(ply)
    logging.debug(f"status: After player {player} -> {nim}")
    player = 1 - player
winner = 1 - player
logging.info(f"status: Player {winner} won!")


DEBUG:root:status: Initial board  -> <1 3 5 7 9>
DEBUG:root:status: After player 0 -> <1 3 5 4 9>
DEBUG:root:status: After player 1 -> <1 3 5 4 6>
DEBUG:root:status: After player 0 -> <1 3 5 4 5>
DEBUG:root:status: After player 1 -> <0 3 5 4 5>
DEBUG:root:status: After player 0 -> <0 3 5 2 5>
DEBUG:root:status: After player 1 -> <0 3 5 1 5>
DEBUG:root:status: After player 0 -> <0 3 2 1 5>
DEBUG:root:status: After player 1 -> <0 2 2 1 5>
DEBUG:root:status: After player 0 -> <0 2 0 1 5>
DEBUG:root:status: After player 1 -> <0 2 0 0 5>
DEBUG:root:status: After player 0 -> <0 1 0 0 5>
DEBUG:root:status: After player 1 -> <0 0 0 0 5>
DEBUG:root:status: After player 0 -> <0 0 0 0 2>
DEBUG:root:status: After player 1 -> <0 0 0 0 0>
INFO:root:status: Player 1 won!
