Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [2]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy


## The *Nim* and *Nimply* classes

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [4]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [5]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [6]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [7]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [5]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [9]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, pure_random)

nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=5)
INFO:root:status: <1 3 5 2 9>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=2)
INFO:root:status: <1 3 5 0 9>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=5)
INFO:root:status: <1 3 0 0 9>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=1)
INFO:root:status: <1 2 0 0 9>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=1)
INFO:root:status: <1 2 0 0 8>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=3)
INFO:root:status: <1 2 0 0 5>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=2)
INFO:root:status: <1 0 0 0 5>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 0 0 0 5>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=2)
INFO:root:status: <0 0 0 0 3>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=1)
INFO:root:status: <0 0 0 0 2>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=1)
INFO:root:status: <0 0

## Task one - Expert agent

In [6]:
def read_state(nim: Nim):
    str_nim = str(nim).strip('<>').split(' ')
    return [int(n) for n in str_nim]

def show_board(nim: Nim) -> None:
    state = read_state(nim)
    for r in range(len(state)):
        tmp_str = ''
        for _ in range(len(state) - (r + 1)): tmp_str += '_ '
        for _ in range(state[r]): tmp_str += 'O '
        for _ in range(len(state) + r - state[r]): tmp_str += '_ '
        print(tmp_str)

def my_nim_sum(state):
    to_remove = state[0]
    for r in state[1:]: to_remove = np.bitwise_xor(to_remove, r)
    return to_remove

def find_move(state):
    
    idx_more_than_one = np.where(np.array(state) > 1)[0]
    more_than_one = len(idx_more_than_one)
    n_one = len(np.where(np.array(state) == 1)[0])

    ## my tweak to the optimal strategy of always zeroing the nim-sum

    if more_than_one == 1:
        if n_one % 2 != 0:
            return Nimply(idx_more_than_one[0], state[idx_more_than_one[0]])
        return Nimply(idx_more_than_one[0], state[idx_more_than_one[0]] - 1)
    elif more_than_one == 0:
        return Nimply(np.where(np.array(state) == 1)[0][0], 1)
    
    ## find a move that make the nim-sum zero
    
    for r in range(len(state)):
        for c in range(1, state[r] + 1):
            state_copy = state.copy()
            state_copy[r] -= c

            if my_nim_sum(state_copy) == 0: return Nimply(r, c)

    ## the nim-sum is already zero, meaning disadvantage, and that each move can be countered -> remove a single element from a random row (longer games mean more probability for the opponent to make a mistake)

    #print('no move found - going random')
    remove_from = random.randint(0, len(state) - 1)
    while state[remove_from] == 0: remove_from = random.randint(0, len(state) - 1)
    return Nimply(remove_from, 1)


def nim_sum_move(nim: Nim):
    state = read_state(nim)

    if sum(state) == 0: 
        print('error - game is empty')
        return None

    return find_move(state)

In [11]:
#nim = Nim(6)
#show_board(nim)
#print('-----------------------')
#print('starting the game!\n')
#to_remove = my_nim_sum(read_state(nim))
#print('if played correctly')
#if to_remove == 0: print('player 1 should win')
#else: print('player 0 should win')
#print('-----------------------')
#
#strategies = [nim_sum_move, optimal, nim_sum_move, ]
#player = 0
#while nim:
#    ply = strategies[player](nim)
#    print(f"player {player} removes {ply[1]} from {ply[0]}")
#    nim.nimming(ply)
#    show_board(nim)
#    #print(nim)
#    player = 1 - player
#    print('------------------------------')
#print(f"end of game: Player {player} won!")

In [12]:
strategies = [nim_sum_move, optimal, nim_sum_move, gabriele, ]
NIM_ROWS_RANGE = [2, 3, 4, 5, 6, 7, 8, 9, 10]
N_GAMES = 100

for nim_rows in NIM_ROWS_RANGE:

    for starting_player in [0, 1]: #[0, 1]:

        first = strategies[starting_player].__name__
        second = strategies[1 - starting_player].__name__
        to_remove = my_nim_sum(read_state(Nim(nim_rows)))

        print('==================================================================================================================================')
        print('starting games!\n')
        print(f'number of nim rows: {nim_rows}')
        print(f'starting nim-sum: {to_remove}')
        print(f'first to play: {first}')
        if to_remove == 0: print(f"if played correctly <<{second}>> has advantage because the initial nim-sum is {to_remove} and it's the second to play")
        else: print(f"if played correctly <<{first}>> has advantage because the initial nim-sum is {to_remove} and it's the first to play")
        print('-----------------------')

        wins = [0, 0]
        for i in range(N_GAMES):
            #print('================================')
            #print('================================')
            #print(f'GAME {i}')
            #print('================================')
            #print('================================')
            nim = Nim(nim_rows)
            player = starting_player
            #show_board(nim)
            #print(my_nim_sum(read_state(nim)))

            while nim:
                #print('---------------------------')
                #print(f'player <<{strategies[player].__name__}>>')
                ply = strategies[player](nim)
                nim.nimming(ply)
                #show_board(nim)
                #print(my_nim_sum(read_state(nim)))
                player = 1 - player
            wins[player] += 1

        print(f'<<{strategies[0].__name__}>> won {wins[0]} times')
        print(f'<<{strategies[1].__name__}>> won {wins[1]} times')

starting games!

number of nim rows: 2
starting nim-sum: 2
first to play: nim_sum_move
if played correctly <<nim_sum_move>> has advantage because the initial nim-sum is 2 and it's the first to play
-----------------------
<<nim_sum_move>> won 100 times
<<optimal>> won 0 times
starting games!

number of nim rows: 2
starting nim-sum: 2
first to play: optimal
if played correctly <<optimal>> has advantage because the initial nim-sum is 2 and it's the first to play
-----------------------
<<nim_sum_move>> won 69 times
<<optimal>> won 31 times
starting games!

number of nim rows: 3
starting nim-sum: 7
first to play: nim_sum_move
if played correctly <<nim_sum_move>> has advantage because the initial nim-sum is 7 and it's the first to play
-----------------------
<<nim_sum_move>> won 100 times
<<optimal>> won 0 times
starting games!

number of nim rows: 3
starting nim-sum: 7
first to play: optimal
if played correctly <<optimal>> has advantage because the initial nim-sum is 7 and it's the first

## Task two - evolutionary strategy

In [13]:
import numpy as np
from itertools import product
from random import randint

In [34]:
N_ROWS = 4
STARTING_POPULATION = 10

x = [2*(r+1) for r in range(N_ROWS)]
tmp = 1
for xi in x: tmp *= xi
print(tmp)
x2 = [range(ix) for ix in x]
k = None
for i in range(1, len(x2)):
    if k is None:
        k = product(x2[i-1], x2[i])
    else: k = product(k, x2[i])

states = []
for x in k:
    tmp_x = x
    tmp_state = []
    for _ in range(N_ROWS-1):
        tmp_state.append(tmp_x[-1])
        tmp_x = tmp_x[0]
    tmp_state.append(tmp_x)
    tmp_state = tmp_state[::-1]
    states.append(tmp_state)

STATES = [tuple(s) for s in states[1:]]
STATE_TO_IDX = {}
for i in range(len(STATES)): STATE_TO_IDX[STATES[i]] = i

384


In [126]:
def read_state(nim: Nim):
    str_nim = str(nim).strip('<>').split(' ')
    return [int(n) for n in str_nim]

def create_individual():

    individual = []
    for i in range(len(STATES)):
        state = STATES[i]
        r = randint(0, N_ROWS-1)
        while state[r] == 0: r = randint(0, N_ROWS-1)
        c = randint(1, state[r])
        individual.append((r, c))
    
    return individual

def valid_move(state, move):
    if move[0] < 0: return False
    if move[0] >= len(state): return False
    if move[1] <= 0: return False
    if state[move[0]] - move[1] < 0: return False
    return True

def mutation_1(id):
    id_new = id.copy()
    changed = 0
    while changed < 1:
        s_idx = randint(0, len(STATES) - 1)
        state = STATES[s_idx]
        r = randint(0, N_ROWS-1)
        while state[r] == 0: r = randint(0, N_ROWS-1)
        c = randint(1, state[r])
        if id[s_idx] != (r, c):
            id_new[s_idx] = (r, c)
            changed += 1

    return id_new

def mutation_1_2(id):
    id_new = id.copy()
    changed = 0
    while changed < 10:
        s_idx = randint(0, len(STATES) - 1)
        state = STATES[s_idx]
        r = randint(0, N_ROWS-1)
        while state[r] == 0: r = randint(0, N_ROWS-1)
        c = randint(1, state[r])
        if id[s_idx] != (r, c):
            id_new[s_idx] = (r, c)
            changed += 1

    return id_new

def mutation_2(id):
    id_new = id.copy()
    changed = 0
    while changed < 1:
        s_idx = randint(0, len(STATES) - 1)
        state = STATES[s_idx]
        move = id[s_idx]
        rc = randint(0, 1) == 0
        pm = [-1, 1][randint(0, 1)]
        r, c = move[0], move[1]
        if rc: r += pm
        else: c += pm

        if valid_move(state, (r, c)):
            id_new[s_idx] = (r, c)
            changed += 1

    return id_new

def mutation_2_2(id):
    id_new = id.copy()
    changed = 0
    while changed < 10:
        s_idx = randint(0, len(STATES) - 1)
        state = STATES[s_idx]
        move = id[s_idx]
        rc = randint(0, 1) == 0
        pm = [-1, 1][randint(0, 1)]
        r, c = move[0], move[1]
        if rc: r += pm
        else: c += pm

        if valid_move(state, (r, c)):
            id_new[s_idx] = (r, c)
            changed += 1

    return id_new

def fusion_1(id_1, id_2):
    idx = randint(0, len(id_1) - 1)
    id_1_new = id_1.copy()
    id_1_new[idx] = id_2[idx]
    id_2_new = id_2.copy()
    id_2_new[idx] = id_1[idx]

    return id_1_new, id_2_new


def fusion_2(id_1, id_2):
    idx = randint(0, len(id_1) - 1)
    id_new = id_1.copy()
    id_new[int(len(id_1) / 2) + idx:] = id_2[int(len(id_1) / 2) + idx:]
    id_new[:idx] = id_2[:idx]

    return id_new

In [269]:
def match(id_1, id_2, one_first, modified_start= None):

    strategies = [id_1, id_2]

    if one_first: player = 0
    else: player = 1

    if modified_start is None: nim = Nim(N_ROWS)
    else: nim = modified_start

    while nim:
        ply = strategies[player][STATE_TO_IDX[tuple(read_state(nim))]]
        nim.nimming(ply)
        player = 1 - player

    return player == 0
    #return True if id_1 won else False

def match_mine(id_1, one_first, modified_start= None):

    if one_first: player = 0
    else: player = 1

    if modified_start is None: nim = Nim(N_ROWS)
    else: nim = modified_start

    while nim:
        if player == 0: ply = id_1[STATE_TO_IDX[tuple(read_state(nim))]]
        else: ply = nim_sum_move(nim)
        nim.nimming(ply)
        player = 1 - player

    return player == 0
    #return True if id_1 won else False

def match_optimal(id_1, one_first, modified_start= None):

    if one_first: player = 0
    else: player = 1

    if modified_start is None: nim = Nim(N_ROWS)
    else: nim = modified_start

    while nim:
        if player == 0: ply = id_1[STATE_TO_IDX[tuple(read_state(nim))]]
        else: ply = optimal(nim)
        nim.nimming(ply)
        player = 1 - player

    return player == 0
    #return True if id_1 won else False

def tournament(population):
    #print('starting tournament')
    scores = np.zeros(len(population))
    for i in range(len(population)):
        for j in range(i + 1, len(population)):
            if match(population[i], population[j], one_first= True): scores[i] += 1
            else: scores[j] += 1
            if match(population[i], population[j], one_first= False): scores[i] += 1
            else: scores[j] += 1
    best_idx = np.argsort(scores)[::-1]
    new_population = [population[i] for i in best_idx[:STARTING_POPULATION]]
    return new_population, scores[best_idx[:STARTING_POPULATION]]

def tournament_2(population):
    #print('starting tournament')
    scores = np.zeros(len(population))
    scores_against_mine = np.zeros(len(population))
    scores_against_optimal = np.zeros(len(population))

    for i_start in range(10):
        
        if i_start == 0: 
            nim = Nim(N_ROWS)
        else:
            nim = Nim(N_ROWS)
            r = randint(0, N_ROWS-1)
            c = randint(1, read_state(nim)[r])
            nim.nimming(Nimply(r, c))
            #show_board(nim)


        for i in range(len(population)):

            if match_mine(population[i], one_first= True, modified_start= deepcopy(nim)):
                scores[i] += 10000
                scores_against_mine[i] += 1
            if match_mine(population[i], one_first= False, modified_start= deepcopy(nim)):
                scores[i] += 10000
                scores_against_mine[i] += 1

            for _ in range(5):

                if match_optimal(population[i], one_first= True, modified_start= deepcopy(nim)): 
                    scores[i] += (len(population) // 5)
                    scores_against_optimal[i] += 1
                if match_optimal(population[i], one_first= False, modified_start= deepcopy(nim)):
                    scores[i] += (len(population) // 5)
                    scores_against_optimal[i] += 1

            for j in range(len(population)):
                if i != j:
                    if match(population[i], population[j], one_first= True, modified_start= deepcopy(nim)): scores[i] += 1
                    else: scores[j] += 1
                    if match(population[i], population[j], one_first= False, modified_start= deepcopy(nim)): scores[i] += 1
                    else: scores[j] += 1

    best_idx = np.argsort(scores)[::-1]
    new_population = [population[i] for i in best_idx[:STARTING_POPULATION]]
    return new_population, scores[best_idx[:STARTING_POPULATION]], scores_against_mine[best_idx[:STARTING_POPULATION]], scores_against_optimal[best_idx[:STARTING_POPULATION]]

def tournament_3(population):  # scores as tuple and try different starts
    #print('starting tournament')
    scores = np.array([[0, 0, 0] for _ in range(len(population))])

    for i_start in range(10):
        
        if i_start == 0: 
            nim = Nim(N_ROWS)
        else:
            nim = Nim(N_ROWS)
            r = randint(0, N_ROWS-1)
            c = randint(1, read_state(nim)[r])
            nim.nimming(Nimply(r, c))
            #show_board(nim)

        for i in range(len(population)):

            if match_mine(population[i], one_first= True, modified_start= deepcopy(nim)): scores[i][0] += 1
            if match_mine(population[i], one_first= False, modified_start= deepcopy(nim)): scores[i][0] += 1

            for _ in range(5):
                if match_optimal(population[i], one_first= True, modified_start= deepcopy(nim)): scores[i][1] += 1
                if match_optimal(population[i], one_first= False, modified_start= deepcopy(nim)): scores[i][1] += 1

            for j in range(len(population)):
                if i!= j:
                    if match(population[i], population[j], one_first= True, modified_start= deepcopy(nim)): scores[i][2] += 1
                    else: scores[j][2] += 1
                    if match(population[i], population[j], one_first= False, modified_start= deepcopy(nim)): scores[i][2] += 1
                    else: scores[j][2] += 1

    sort_idx = np.append(np.array([x for x in range(len(population))]).reshape(-1, 1), scores, axis= 1)

    best_idx = np.array(sorted(sort_idx, key=lambda tup: (tup[1], tup[2], tup[3]), reverse=True))[:, 0]
    
    new_population = [population[i] for i in best_idx[:STARTING_POPULATION]]
    return new_population, scores[best_idx[:STARTING_POPULATION]]


def procreation(population):
    #print('procreating')
    # create new individuals through mutation and fusion
    new_population = population.copy()
    for i in range(len(population)):

        new_population.append(population[i])

        new_population.append(mutation_1(population[i]))
        
        new_population.append(mutation_1_2(population[i]))
        
        new_population.append(mutation_2(population[i]))
        
        new_population.append(mutation_2_2(population[i]))

        for j in range(i + 1, len(population)):

            for ni in fusion_1(population[i], population[j]): new_population.append(ni)

            new_population.append(fusion_2(population[i], population[j]))

    return new_population

In [270]:
population = [create_individual() for _ in range(STARTING_POPULATION)]

for i in range(1000):

    print(f'epoch {i}')

    population = procreation(population)

    population, scores, vs_mine, vs_optimal = tournament_2(population)
    print(scores)
    print(vs_optimal)
    print(vs_mine)

    #population, scores = tournament_3(population)
    #print(scores)

print(len(scores))
print(scores)

epoch 0
[16355. 16241.  6217.  6207.  6177.  6167.  6143.  6139.  6102.  6032.]
[33. 31. 35. 35. 33. 33. 33. 33. 28. 28.]
[1. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
epoch 1
[25368. 25251. 25213. 25115. 24714. 15407. 15368. 15368. 15330. 15310.]
[34. 31. 35. 31. 24. 35. 34. 34. 38. 36.]
[2. 2. 2. 2. 2. 1. 1. 1. 1. 1.]
epoch 2
[7869. 7516. 5783. 5588. 5549. 5471. 5471. 5432. 5393. 5393.]
[33. 24. 39. 44. 43. 41. 41. 40. 39. 39.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
epoch 3
[16735. 15333. 15003. 14881. 14801. 14801. 14764. 14723. 14684. 14684.]
[31. 35. 33. 33. 31. 31. 30. 29. 28. 28.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
epoch 4
[7139. 7100. 7100. 7061. 7061. 7022. 7022. 6944. 6944. 6905.]
[35. 34. 34. 33. 33. 32. 32. 30. 30. 29.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
epoch 5
[25294. 25226. 25021. 25021. 24670. 15255. 15216. 15216. 15216. 15177.]
[36. 34. 29. 29. 20. 35. 34. 34. 34. 33.]
[2. 2. 2. 2. 2. 1. 1. 1. 1. 1.]
epoch 6
[6546. 6156. 6156. 6156. 6078. 6039. 6039. 6000. 6000. 5999.]
[50. 40. 40. 40. 38. 37. 3

In [271]:
champion = population[0]

In [272]:
nim = Nim(N_ROWS)
show_board(nim)

champion_is_first = False

print('-----------------------')
print('starting the game!\n')
to_remove = my_nim_sum(read_state(nim))

if champion_is_first: print('champion is player 0')
else: print('champion is player 1')

print('\nif played correctly')
if to_remove == 0: print('player 1 should win')
else: print('player 0 should win')
print('-----------------------')

player = 0
while nim:
    if champion_is_first:
        if player == 0: ply = champion[STATE_TO_IDX[tuple(read_state(nim))]]
        else: ply = nim_sum_move(nim)
    else:
        if player == 0: ply = nim_sum_move(nim)
        else: ply = champion[STATE_TO_IDX[tuple(read_state(nim))]]
    
    print(f"player {player} removes {ply[1]} from {ply[0]}")
    nim.nimming(ply)
    show_board(nim)
    #print(nim)
    player = 1 - player
    print('------------------------------')
if champion_is_first: print('champion is player 0')
else: print('champion is player 1')
print(f"end of game: Player {player} won!")

_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O O O O _ 
O O O O O O O 
-----------------------
starting the game!

champion is player 1

if played correctly
player 1 should win
-----------------------
player 0 removes 1 from 1
_ _ _ O _ _ _ 
_ _ O O _ _ _ 
_ O O O O O _ 
O O O O O O O 
------------------------------
player 1 removes 7 from 3
_ _ _ O _ _ _ 
_ _ O O _ _ _ 
_ O O O O O _ 
_ _ _ _ _ _ _ 
------------------------------
player 0 removes 2 from 2
_ _ _ O _ _ _ 
_ _ O O _ _ _ 
_ O O O _ _ _ 
_ _ _ _ _ _ _ 
------------------------------
player 1 removes 2 from 1
_ _ _ O _ _ _ 
_ _ _ _ _ _ _ 
_ O O O _ _ _ 
_ _ _ _ _ _ _ 
------------------------------
player 0 removes 3 from 2
_ _ _ O _ _ _ 
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
------------------------------
player 1 removes 1 from 0
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
------------------------------
champion is player 1
end of game: Player 0 won!


In [275]:
nim = Nim(N_ROWS)
show_board(nim)

champion_is_first = True

print('-----------------------')
print('starting the game!\n')
to_remove = my_nim_sum(read_state(nim))

if champion_is_first: print('champion is player 0')
else: print('champion is player 1')

print('\nif played correctly')
if to_remove == 0: print('player 1 should win')
else: print('player 0 should win')
print('-----------------------')

player = 0
while nim:
    if champion_is_first:
        if player == 0: ply = champion[STATE_TO_IDX[tuple(read_state(nim))]]
        else: ply = optimal(nim)
    else:
        if player == 0: ply = optimal(nim)
        else: ply = champion[STATE_TO_IDX[tuple(read_state(nim))]]
    
    print(f"player {player} removes {ply[1]} from {ply[0]}")
    nim.nimming(ply)
    show_board(nim)
    #print(nim)
    player = 1 - player
    print('------------------------------')
if champion_is_first: print('champion is player 0')
else: print('champion is player 1')
print(f"end of game: Player {player} won!")

_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O O O O _ 
O O O O O O O 
-----------------------
starting the game!

champion is player 0

if played correctly
player 1 should win
-----------------------
player 0 removes 3 from 3
_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O O O O _ 
O O O O _ _ _ 
------------------------------
player 1 removes 4 from 3
_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O O O O _ 
_ _ _ _ _ _ _ 
------------------------------
player 0 removes 3 from 2
_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O _ _ _ _ 
_ _ _ _ _ _ _ 
------------------------------
player 1 removes 3 from 1
_ _ _ O _ _ _ 
_ _ _ _ _ _ _ 
_ O O _ _ _ _ 
_ _ _ _ _ _ _ 
------------------------------
player 0 removes 2 from 2
_ _ _ O _ _ _ 
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
------------------------------
player 1 removes 1 from 0
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
------------------------------
champion is player 0
end of game: Player 0 won!


## first try deep

In [551]:
#from math import floor
#
#def valid_move(state, move):
#    if move[0] < 0: return False
#    if move[0] >= len(state): return False
#    if move[1] <= 0: return False
#    if state[move[0]] - move[1] < 0: return False
#    return True
#
#def sigmoid(x): return 1 / (1 + np.exp(-x))
#
#def anti_sigmoid(x): return np.log(x) - np.log(1 - x)
#
#class Individual:
#    def __init__(self, n_nim_rows, copy_from= None, random_init= False):
#
#        self.n_rows = n_nim_rows
#        self.n_hidden = 4
#
#        if copy_from is None:
#
#            if not random_init:
#
#                self.input_layer_weights = np.zeros(self.n_hidden * n_nim_rows).reshape(self.n_hidden, -1)
#                self.input_layer_biases = np.zeros(self.n_hidden)
#
#                self.hidden_layer_weights = np.zeros(2 * self.n_hidden).reshape(-1, self.n_hidden)
#                self.hidden_layer_biases = np.zeros(2)
#
#            else:
#
#                [sigmoid(randint(-9, 9)) for _ in range(self.n_hidden * n_nim_rows)]
#                self.input_layer_weights = np.array([sigmoid(randint(-9, 9)) for _ in range(self.n_hidden * n_nim_rows)]).reshape(self.n_hidden, -1)
#                self.input_layer_biases = np.zeros(self.n_hidden)
#
#                self.hidden_layer_weights = np.array([sigmoid(randint(-9, 9)) for _ in range(2 * self.n_hidden)]).reshape(-1, self.n_hidden)
#                self.hidden_layer_biases = np.zeros(2)
#        else:
#
#            if type(copy_from) == Individual:
#
#                self.input_layer_weights, self.input_layer_biases = copy_from.get_input_layer()
#                self.hidden_layer_weights, self.hidden_layer_biases = copy_from.get_hidden_layer()
#
#            else:
#
#                self.input_layer_weights, self.input_layer_biases = copy_from[0]
#                self.hidden_layer_weights, self.hidden_layer_biases = copy_from[1]
#
#
#    def get_input_layer(self): return self.input_layer_weights, self.input_layer_biases
#    def get_hidden_layer(self): return self.hidden_layer_weights, self.hidden_layer_biases
#
#    def move(self, nim, get_derivates= False):
#
#        state = read_state(nim)
#
#        len_s = len(state)
#
#        hidden_layer = np.zeros(self.n_hidden)
#        if get_derivates: derivates_hidden = np.zeros(len_s)
#
#        for j in range(self.n_hidden):
#
#            for i in range(len_s):
#
#                hidden_layer[j] += state[i] * self.input_layer_weights[j, i]
#            
#            hidden_layer[j] = (sigmoid(hidden_layer[j] + self.input_layer_biases[j]) * 2) - 1
#
#            if get_derivates: derivates_hidden[j] = hidden_layer[j] * (1 - hidden_layer[j])
#
#        output_layer = np.zeros(2)
#        if get_derivates: derivates_output = np.zeros(2)
#
#        for j in range(2):
#
#            for i in range(self.n_hidden):
#
#                output_layer[j] += hidden_layer[i] * self.hidden_layer_weights[j, i]
#            
#            output_layer[j] = sigmoid(output_layer[j] + self.hidden_layer_biases[j])
#
#            if get_derivates: derivates_output[j] = output_layer[j] * (1 - output_layer[j])
#
#        r = floor(output_layer[0] * self.n_rows)# if output_layer[0] < 1 else self.n_rows - 1
#        c = floor(output_layer[1] * state[r])# if output_layer[1] < 1 else state[r] - 1
#
#
#        if get_derivates: return Nimply(r, c), derivates_hidden, derivates_output
#        return Nimply(r, c)
#    
#    def train(self, nim):
#
#        state = read_state(nim)
#
#        best_move = nim_sum_move(nim)
#
#        best_move_pre = best_move[0] / self.n_rows, best_move[1] / state[best_move[0]]
#
#        move, hidden_derivates, output_derivates = self.move(nim, get_derivates= True)
#
#        if state[move[0]] != 0: move_pre = move[0] / self.n_rows, move[1] / state[move[0]]
#        else: move_pre = move[0] / self.n_rows, best_move[1]
#        
#        error_0 = pow(move_pre[0] - best_move_pre[0], 2)
#        error_1 = pow(move_pre[1] - best_move_pre[1], 2)
#
#        dcda = [2 * (move_pre[0] - best_move_pre[0]), 2 * (move_pre[1] - best_move_pre[1])]
#
#        for i in range(2):
#            for j in range(self.n_hidden):
#                self.hidden_layer_weights[i, j] -= 0.1 * dcda[i] * output_derivates[i]
#
#                for k in range(len(state)):
#                    self.input_layer_weights[j, k] -= 0.1 * dcda[i] * output_derivates[i] * hidden_derivates[j]
#
#        #print('================================================')
#        #print('state')
#        #print(state)        
#        #print('\nbest move')
#        #print((best_move[0], best_move[1]))
#        #print('\nmove')
#        #print((move[0], move[1]))
#        #print('-----------------')
#        #print('\npre move comparison')
#        #print(move_pre)
#        #print(best_move_pre)
#        #print('\nerror')
#        #print((error_0, error_1))
#        #print('\nweights')
#        #print(self.input_layer_weights)
#        #print(self.hidden_layer_weights)
#        #print(self.input_layer_weights.shape)
#        #print(self.hidden_layer_weights.shape)
#        #print('derivatives')
#        #print(hidden_derivates)
#        #print(output_derivates)
#
#        return best_move
#
#    def mutate(self):
#        
#        new_input_weights = np.copy(self.input_layer_weights)
#        new_input_biases = np.copy(self.input_layer_biases)
#
#        for i in range(new_input_weights.shape[0]):
#            for j in range(new_input_weights.shape[1]):
#                new_w = new_input_weights[i, j] + np.random.normal(0, 0.1)
#                while new_w < 0 or new_w >= 1: new_w = new_input_weights[i, j] + np.random.normal(0, 0.1)
#                new_input_weights[i, j] = new_w
#
#        for i in range(len(new_input_biases)):
#            new_b = new_input_biases[i] + np.random.normal(0, 0.1)
#            while new_b < 0 or new_b >= 1: new_b = new_input_biases[i] + np.random.normal(0, 0.1)
#            new_input_biases[i] = new_b
#
#        new_hidden_weights = np.copy(self.hidden_layer_weights)
#        new_hidden_biases = np.copy(self.hidden_layer_biases)
#
#        for i in range(new_hidden_weights.shape[0]):
#            for j in range(new_hidden_weights.shape[1]):
#                new_w = new_hidden_weights[i, j] + np.random.normal(0, 0.1)
#                while new_w < 0 or new_w >= 1: new_w = new_hidden_weights[i, j] + np.random.normal(0, 0.1)
#                new_hidden_weights[i, j] = new_w
#
#        for i in range(len(new_hidden_biases)):
#            new_b = new_hidden_biases[i] + np.random.normal(0, 0.1)
#            while new_b < 0 or new_b >= 1: new_b = new_hidden_biases[i] + np.random.normal(0, 0.1)
#            new_hidden_biases[i] = new_b
#
#        return Individual(self.n_rows, copy_from= ((new_input_weights, new_input_biases), (new_hidden_weights, new_hidden_biases)))



#id = Individual(4, random_init= True)



#for g in range(100000):
#
#    #print(f'game {g + 1}')
#
#    nim = Nim(4)
#    r = randint(0, 3)
#    c = randint(0, read_state(nim)[r])
#    nim.nimming(Nimply(r, c))
#
#    while nim:
#        ply = id.train(nim)
#        nim.nimming(ply)

## due reti, uno sceglie (state) -> (row), l'altra (state, row) -> (n_from row) ma non evolutionary

In [587]:
from math import floor

def valid_move(state, move):
    if move[0] < 0: return False
    if move[0] >= len(state): return False
    if move[1] <= 0: return False
    if state[move[0]] - move[1] < 0: return False
    return True

def sigmoid(x): return 1 / (1 + np.exp(-x))

def anti_sigmoid(x): return np.log(x) - np.log(1 - x)

class Individual_2:
    def __init__(self, n_nim_rows, copy_from= None, random_init= False):

        self.n_rows = n_nim_rows
        self.n_hidden = 4

        if copy_from is None:

            if not random_init:

                self.input_layer_weights_1 = np.zeros(self.n_hidden * n_nim_rows).reshape(self.n_hidden, -1)
                self.input_layer_biases_1 = np.zeros(self.n_hidden)
                self.hidden_layer_weights_1 = np.zeros(self.n_hidden)
                self.hidden_layer_biases_1 = 0

                self.input_layer_weights_2 = np.zeros(self.n_hidden * (n_nim_rows + 1)).reshape(self.n_hidden, -1)
                self.input_layer_biases_2 = np.zeros(self.n_hidden)
                self.hidden_layer_weights_2 = np.zeros(self.n_hidden)
                self.hidden_layer_biases_2 = 0

            else:

                self.input_layer_weights_1 = np.array([sigmoid(randint(-9, 9)) for _ in range(self.n_hidden * n_nim_rows)]).reshape(self.n_hidden, -1)
                self.input_layer_biases_1 = np.zeros(self.n_hidden)
                self.hidden_layer_weights_1 = np.array([sigmoid(randint(-9, 9)) for _ in range(self.n_hidden)])
                self.hidden_layer_biases_1 = 0

                self.input_layer_weights_2 = np.array([sigmoid(randint(-9, 9)) for _ in range(self.n_hidden * (n_nim_rows + 1))]).reshape(self.n_hidden, -1)
                self.input_layer_biases_2 = np.zeros(self.n_hidden)
                self.hidden_layer_weights_2 = np.array([sigmoid(randint(-9, 9)) for _ in range(self.n_hidden)])
                self.hidden_layer_biases_2 = 0
        else:

            if type(copy_from) == Individual_2:

                self.input_layer_weights_1, self.input_layer_weights_2, self.input_layer_biases_1, self.input_layer_biases_2 = copy_from.get_input_layer()
                self.hidden_layer_weights_1, self.hidden_layer_weights_2, self.hidden_layer_biases_1, self.hidden_layer_biases_2 = copy_from.get_hidden_layer()

            else:

                self.input_layer_weights_1, self.input_layer_weights_2, self.input_layer_biases_1, self.input_layer_biases_2 = copy_from[0]
                self.hidden_layer_weights_1, self.hidden_layer_weights_2, self.hidden_layer_biases_1, self.hidden_layer_biases_2 = copy_from[1]


    def get_input_layer(self): return self.input_layer_weights_1, self.input_layer_weights_2, self.input_layer_biases_1, self.input_layer_biases_2
    def get_hidden_layer(self): return self.hidden_layer_weights_1, self.hidden_layer_weights_2, self.hidden_layer_biases_1, self.hidden_layer_biases_2

    def move_r(self, nim, get_derivates= False):

        state = read_state(nim)

        len_s = len(state)

        hidden_layer_1 = np.zeros(self.n_hidden)
        if get_derivates: derivates_hidden_1 = np.zeros(len_s)

        for j in range(self.n_hidden):

            for i in range(len_s):

                hidden_layer_1[j] += state[i] * self.input_layer_weights_1[j, i]
            
            hidden_layer_1[j] = (sigmoid(hidden_layer_1[j] + self.input_layer_biases_1[j]) * 2) - 1

            if get_derivates: derivates_hidden_1[j] = hidden_layer_1[j] * (1 - hidden_layer_1[j])

        output_layer_1 = 0

        for i in range(self.n_hidden):

            output_layer_1 += hidden_layer_1[i] * self.hidden_layer_weights_1[i]
        
        output_layer_1 = sigmoid(output_layer_1 + self.hidden_layer_biases_1)

        if get_derivates: derivates_output_1 = output_layer_1 * (1 - output_layer_1)

        r = floor(output_layer_1 * self.n_rows)# if output_layer[0] < 1 else self.n_rows - 1

        if get_derivates: return r, derivates_hidden_1, derivates_output_1
        else: return r
    
    def move_c(self, nim, r, get_derivates= False):

        state = read_state(nim)

        len_s = len(state)

        hidden_layer_2 = np.zeros(self.n_hidden)
        if get_derivates: derivates_hidden_2 = np.zeros(len_s + 1)

        for j in range(self.n_hidden):

            for i in range(len_s):

                hidden_layer_2[j] += state[i] * self.input_layer_weights_2[j, i]

###########################################################################################################################

            #hidden_layer_2[j] += r * self.input_layer_weights_2[j, i + 1]
            hidden_layer_2[j] += state[r] * self.input_layer_weights_2[j, i + 1]

###########################################################################################################################
            
            hidden_layer_2[j] = (sigmoid(hidden_layer_2[j] + self.input_layer_biases_2[j]) * 2) - 1

            if get_derivates: derivates_hidden_2[j] = hidden_layer_2[j] * (1 - hidden_layer_2[j])

        output_layer_2 = 0

        for i in range(self.n_hidden):

            output_layer_2 += hidden_layer_2[i] * self.hidden_layer_weights_2[i]
        
        output_layer_2 = sigmoid(output_layer_2 + self.hidden_layer_biases_2)

        if get_derivates: derivates_output_2 = output_layer_2 * (1 - output_layer_2)

        c = floor(output_layer_2 * state[r])# if output_layer[1] < 1 else state[r] - 1

        if get_derivates: return c, derivates_hidden_2, derivates_output_2
        else: return c

    def move(self, nim): return Nimply(self.move_r(nim), self.move_c(nim, r))
    
    def train(self, nim):

        state = read_state(nim)

        best_move = nim_sum_move(nim)

        best_move_pre = best_move[0] / self.n_rows, best_move[1] / state[best_move[0]]

        r, r_hidden_derivates, r_output_derivates = self.move_r(nim, get_derivates= True)
        c, c_hidden_derivates, c_output_derivates = self.move_c(nim, best_move[0], get_derivates= True)
        move = (r, c)

        if state[move[0]] != 0: move_pre = move[0] / self.n_rows, move[1] / state[move[0]]
        else: move_pre = move[0] / self.n_rows, best_move[1]

        dcda = [2 * (move_pre[0] - best_move_pre[0]), 2 * (move_pre[1] - best_move_pre[1])]

        for j in range(self.n_hidden):
            self.hidden_layer_weights_1[j] -= 0.1 * dcda[0] * r_output_derivates
            self.hidden_layer_weights_2[j] -= 0.1 * dcda[1] * c_output_derivates

            for k in range(len(state)):
                self.input_layer_weights_1[j, k] -= 0.1 * dcda[0] * r_output_derivates * r_hidden_derivates[j]
                self.input_layer_weights_2[j, k] -= 0.1 * dcda[1] * c_output_derivates * c_hidden_derivates[j]

            self.input_layer_weights_2[j, k + 1] -= 0.1 * dcda[1] * c_output_derivates * c_hidden_derivates[j]

        return best_move

In [593]:
id = Individual_2(4, random_init= True)

for g in range(100000):

    #print(f'game {g + 1}')

    nim = Nim(4)
    r = randint(0, 3)
    c = randint(0, read_state(nim)[r])
    nim.nimming(Nimply(r, c))

    while nim:
        ply = id.train(nim)
        nim.nimming(ply)

In [595]:
nim = Nim(4)
show_board(nim)

champion_is_first = True

print('-----------------------')
print('starting the game!\n')
to_remove = my_nim_sum(read_state(nim))

if champion_is_first: print('champion is player 0')
else: print('champion is player 1')

print('\nif played correctly')
if to_remove == 0: print('player 1 should win')
else: print('player 0 should win')
print('-----------------------')

player = 0
while nim:
    if champion_is_first:
        if player == 0: ply = id.move(nim)
        else: ply = optimal(nim)
    else:
        if player == 0: ply = optimal(nim)
        else: ply = id.move(nim)
    
    print(f"player {player} removes {ply[1]} from {ply[0]}")
    nim.nimming(ply)
    show_board(nim)
    #print(nim)
    player = 1 - player
    print('------------------------------')
if champion_is_first: print('champion is player 0')
else: print('champion is player 1')
print(f"end of game: Player {player} won!")

_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O O O O _ 
O O O O O O O 
-----------------------
starting the game!

champion is player 0

if played correctly
player 1 should win
-----------------------
player 0 removes 2 from 2
_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O O _ _ _ 
O O O O O O O 
------------------------------
player 1 removes 3 from 2
_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ _ _ _ _ _ _ 
O O O O O O O 
------------------------------
player 0 removes 0 from 2
_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ _ _ _ _ _ _ 
O O O O O O O 
------------------------------
player 1 removes 7 from 3
_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
------------------------------
player 0 removes 0 from 2
_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
------------------------------
player 1 removes 1 from 1
_ _ _ O _ _ _ 
_ _ O O _ _ _ 
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
------------------------------
player 0 removes 0 from 2
_ _ _ O _ _ _ 
_ _ O O _ _ _ 
_ _ _ _ _ _ _ 
_ _ _ _ _ _ _ 
----------------------

In [599]:
nim = Nim(4)
show_board(nim)

champion_is_first = True

print('-----------------------')
print('starting the game!\n')
to_remove = my_nim_sum(read_state(nim))

if champion_is_first: print('champion is player 0')
else: print('champion is player 1')

print('\nif played correctly')
if to_remove == 0: print('player 1 should win')
else: print('player 0 should win')
print('-----------------------')

player = 0
while nim:
    if champion_is_first:
        if player == 0: ply = id.move(nim)
        else: ply = nim_sum_move(nim)
    else:
        if player == 0: ply = nim_sum_move(nim)
        else: ply = id.move(nim)
    
    print(f"player {player} removes {ply[1]} from {ply[0]}")
    nim.nimming(ply)
    show_board(nim)
    #print(nim)
    player = 1 - player
    print('------------------------------')
if champion_is_first: print('champion is player 0')
else: print('champion is player 1')
print(f"end of game: Player {player} won!")

_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O O O O _ 
O O O O O O O 
-----------------------
starting the game!

champion is player 0

if played correctly
player 1 should win
-----------------------
player 0 removes 2 from 2
_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O O _ _ _ 
O O O O O O O 
------------------------------
player 1 removes 6 from 3
_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O O _ _ _ 
O _ _ _ _ _ _ 
------------------------------
player 0 removes 1 from 2
_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O _ _ _ _ 
O _ _ _ _ _ _ 
------------------------------
player 1 removes 1 from 0
_ _ _ _ _ _ _ 
_ _ O O O _ _ 
_ O O _ _ _ _ 
O _ _ _ _ _ _ 
------------------------------
player 0 removes 0 from 2
_ _ _ _ _ _ _ 
_ _ O O O _ _ 
_ O O _ _ _ _ 
O _ _ _ _ _ _ 
------------------------------
player 1 removes 1 from 1
_ _ _ _ _ _ _ 
_ _ O O _ _ _ 
_ O O _ _ _ _ 
O _ _ _ _ _ _ 
------------------------------
player 0 removes 0 from 2
_ _ _ _ _ _ _ 
_ _ O O _ _ _ 
_ O O _ _ _ _ 
O _ _ _ _ _ _ 
----------------------

## due reti evolutionary - not great results

In [113]:
from math import floor
import numpy as np
from itertools import product
from random import randint

def show_board(nim: Nim) -> None:
    state = read_state(nim)
    for r in range(len(state)):
        tmp_str = ''
        for _ in range(len(state) - (r + 1)): tmp_str += '_ '
        for _ in range(state[r]): tmp_str += 'O '
        for _ in range(len(state) + r - state[r]): tmp_str += '_ '
        print(tmp_str)
    print('=============')

def valid_move(state, move):
    if move[0] < 0: return False
    if move[0] >= len(state): return False
    if move[1] <= 0: return False
    if state[move[0]] - move[1] < 0: return False
    return True

def sigmoid(x): return 1 / (1 + np.exp(-x))

def anti_sigmoid(x): return np.log(x) - np.log(1 - x)

class Individual_3:
    def __init__(self, n_nim_rows, copy_from= None, random_init= False):

        self.n_rows = n_nim_rows
        self.n_hidden = 4

        if copy_from is None:

            if not random_init:

                self.input_layer_weights_r = np.zeros(self.n_hidden * n_nim_rows).reshape(self.n_hidden, -1)
                self.input_layer_biases_r = np.zeros(self.n_hidden)
                self.hidden_layer_weights_r = np.zeros(self.n_hidden)
                self.hidden_layer_biases_r = 0

                self.input_layer_weights_c = np.zeros(self.n_hidden * (n_nim_rows + 1)).reshape(self.n_hidden, -1)
                self.input_layer_biases_c = np.zeros(self.n_hidden)
                self.hidden_layer_weights_c = np.zeros(self.n_hidden)
                self.hidden_layer_biases_c = 0

            else:

                self.input_layer_weights_r = np.array([sigmoid(randint(-9, 9)) for _ in range(self.n_hidden * n_nim_rows)]).reshape(self.n_hidden, -1)
                self.input_layer_biases_r = np.zeros(self.n_hidden)
                self.hidden_layer_weights_r = np.array([sigmoid(randint(-9, 9)) for _ in range(self.n_hidden)])
                self.hidden_layer_biases_r = 0

                self.input_layer_weights_c = np.array([sigmoid(randint(-9, 9)) for _ in range(self.n_hidden * (n_nim_rows + 1))]).reshape(self.n_hidden, -1)
                self.input_layer_biases_c = np.zeros(self.n_hidden)
                self.hidden_layer_weights_c = np.array([sigmoid(randint(-9, 9)) for _ in range(self.n_hidden)])
                self.hidden_layer_biases_c = 0
        else:

            if type(copy_from) == Individual_3:

                self.input_layer_weights_r, self.input_layer_biases_r, self.hidden_layer_weights_r, self.hidden_layer_biases_r = copy_from.get_r_dense()
                self.input_layer_weights_c, self.input_layer_biases_c, self.hidden_layer_weights_c, self.hidden_layer_biases_c = copy_from.get_c_dense()

            else:

                self.input_layer_weights_r, self.input_layer_biases_r, self.hidden_layer_weights_r, self.hidden_layer_biases_r = copy_from[0]
                self.input_layer_weights_c, self.input_layer_biases_c, self.hidden_layer_weights_c, self.hidden_layer_biases_c = copy_from[1]

    def get_r_dense(self): return self.input_layer_weights_r, self.input_layer_biases_r, self.hidden_layer_weights_r, self.hidden_layer_biases_r
    def get_c_dense(self): return self.input_layer_weights_c, self.input_layer_biases_c, self.hidden_layer_weights_c, self.hidden_layer_biases_c

    def move_r(self, nim, get_derivates= False):

        state = read_state(nim)

        len_s = len(state)

        hidden_layer_r = np.zeros(self.n_hidden)
        if get_derivates: derivates_hidden_r = np.zeros(len_s)

        for j in range(self.n_hidden):

            for i in range(len_s):

                hidden_layer_r[j] += state[i] * self.input_layer_weights_r[j, i]
            
            hidden_layer_r[j] = (sigmoid(hidden_layer_r[j] + self.input_layer_biases_r[j]) * 2) - 1

            if get_derivates: derivates_hidden_r[j] = hidden_layer_r[j] * (1 - hidden_layer_r[j])

        output_layer_r = 0

        for i in range(self.n_hidden):

            output_layer_r += hidden_layer_r[i] * self.hidden_layer_weights_r[i]
        
        output_layer_r = sigmoid(output_layer_r + self.hidden_layer_biases_r)

        if get_derivates: derivates_output_r = output_layer_r * (1 - output_layer_r)

        r = floor(output_layer_r * self.n_rows)# if output_layer[0] < 1 else self.n_rows - 1

        if get_derivates: return r, derivates_hidden_r, derivates_output_r
        else: return r
    
    def move_c(self, nim, r, get_derivates= False):

        state = read_state(nim)

        len_s = len(state)

        hidden_layer_c = np.zeros(self.n_hidden)
        if get_derivates: derivates_hidden_c = np.zeros(len_s + 1)

        for j in range(self.n_hidden):

            for i in range(len_s):

                hidden_layer_c[j] += state[i] * self.input_layer_weights_c[j, i]

###########################################################################################################################

            #hidden_layer_c[j] += r * self.input_layer_weights_c[j, i + 1]
            hidden_layer_c[j] += state[r] * self.input_layer_weights_c[j, i + 1]

###########################################################################################################################
            
            hidden_layer_c[j] = (sigmoid(hidden_layer_c[j] + self.input_layer_biases_c[j]) * 2) - 1

            if get_derivates: derivates_hidden_c[j] = hidden_layer_c[j] * (1 - hidden_layer_c[j])

        output_layer_c = 0

        for i in range(self.n_hidden):

            output_layer_c += hidden_layer_c[i] * self.hidden_layer_weights_c[i]
        
        output_layer_c = sigmoid(output_layer_c + self.hidden_layer_biases_c)

        if get_derivates: derivates_output_c = output_layer_c * (1 - output_layer_c)

        c = floor(output_layer_c * state[r])# if output_layer[1] < 1 else state[r] - 1

        if get_derivates: return c, derivates_hidden_c, derivates_output_c
        else: return c

    def move(self, nim):
        r = self.move_r(nim)
        c = self.move_c(nim, r)
        if valid_move(read_state(nim), (r, c)): return Nimply(r, c)
        else: return -1

    def mutate(self):

        new_input_weights_r = np.copy(self.input_layer_weights_r)
        new_input_biases_r = np.copy(self.input_layer_biases_r)

        for i in range(new_input_weights_r.shape[0]):
            for j in range(new_input_weights_r.shape[1]):

                new_w = new_input_weights_r[i, j] + np.random.normal(0, 0.1)
                while new_w < 0 or new_w >= 1: new_w = new_input_weights_r[i, j] + np.random.normal(0, 0.1)
                new_input_weights_r[i, j] = new_w

        for i in range(len(new_input_biases_r)):
            new_b = new_input_biases_r[i] + np.random.normal(0, 0.1)
            while new_b < 0 or new_b >= 1: new_b = new_input_biases_r[i] + np.random.normal(0, 0.1)
            new_input_biases_r[i] = new_b

        new_input_weights_c = np.copy(self.input_layer_weights_c)
        new_input_biases_c = np.copy(self.input_layer_biases_c)

        for i in range(new_input_weights_c.shape[0]):
            for j in range(new_input_weights_c.shape[1]):

                new_w = new_input_weights_c[i, j] + np.random.normal(0, 0.1)
                while new_w < 0 or new_w >= 1: new_w = new_input_weights_c[i, j] + np.random.normal(0, 0.1)
                new_input_weights_c[i, j] = new_w

        for i in range(len(new_input_biases_c)):
            new_b = new_input_biases_c[i] + np.random.normal(0, 0.1)
            while new_b < 0 or new_b >= 1: new_b = new_input_biases_c[i] + np.random.normal(0, 0.1)
            new_input_biases_c[i] = new_b

        new_hidden_weights_r = np.copy(self.hidden_layer_weights_r)
        new_hidden_biases_r = np.copy(self.hidden_layer_biases_r)

        for i in range(len(new_hidden_weights_r)):
            new_w = new_hidden_weights_r[i] + np.random.normal(0, 0.1)
            while new_w < 0 or new_w >= 1: new_w = new_hidden_weights_r[i] + np.random.normal(0, 0.1)
            new_hidden_weights_r[i] = new_w

        new_b = new_hidden_biases_r + np.random.normal(0, 0.1)
        while new_b < 0 or new_b >= 1: new_b = new_hidden_biases_r + np.random.normal(0, 0.1)
        new_hidden_biases_r = new_b

        new_hidden_weights_c = np.copy(self.hidden_layer_weights_c)
        new_hidden_biases_c = np.copy(self.hidden_layer_biases_c)

        for i in range(len(new_hidden_weights_c)):
            new_w = new_hidden_weights_c[i] + np.random.normal(0, 0.1)
            while new_w < 0 or new_w >= 1: new_w = new_hidden_weights_c[i] + np.random.normal(0, 0.1)
            new_hidden_weights_c[i] = new_w

        new_b = new_hidden_biases_c + np.random.normal(0, 0.1)
        while new_b < 0 or new_b >= 1: new_b = new_hidden_biases_c + np.random.normal(0, 0.1)
        new_hidden_biases_c = new_b

        return Individual_3(self.n_rows, copy_from= ((new_input_weights_r, new_input_biases_r, new_hidden_weights_r, new_hidden_biases_r), (new_input_weights_c, new_input_biases_c, new_hidden_weights_c, new_hidden_biases_c)))
    
    def mutate_2(self):

        which = randint(1, 8)

        new_input_weights_r = np.copy(self.input_layer_weights_r)
        new_input_biases_r = np.copy(self.input_layer_biases_r)

        if which == 1:

            i = randint(0, new_input_weights_r.shape[0] - 1)
            j = randint(0, new_input_weights_r.shape[1] - 1)

            new_w = new_input_weights_r[i, j] + np.random.normal(0, 0.1)
            while new_w < 0 or new_w >= 1: new_w = new_input_weights_r[i, j] + np.random.normal(0, 0.1)
            new_input_weights_r[i, j] = new_w
        
        elif which == 2:

            i = randint(0, len(new_input_biases_r) - 1)

            new_b = new_input_biases_r[i] + np.random.normal(0, 0.1)
            while new_b < 0 or new_b >= 1: new_b = new_input_biases_r[i] + np.random.normal(0, 0.1)
            new_input_biases_r[i] = new_b

        new_input_weights_c = np.copy(self.input_layer_weights_c)
        new_input_biases_c = np.copy(self.input_layer_biases_c)

        if which == 3:

            i = randint(0, new_input_weights_c.shape[0] - 1)
            j = randint(0, new_input_weights_c.shape[1] - 1)

            new_w = new_input_weights_c[i, j] + np.random.normal(0, 0.1)
            while new_w < 0 or new_w >= 1: new_w = new_input_weights_c[i, j] + np.random.normal(0, 0.1)
            new_input_weights_c[i, j] = new_w

        elif which == 4:

            i = randint(0, len(new_input_biases_c) - 1)

            new_b = new_input_biases_c[i] + np.random.normal(0, 0.1)
            while new_b < 0 or new_b >= 1: new_b = new_input_biases_c[i] + np.random.normal(0, 0.1)
            new_input_biases_c[i] = new_b

        new_hidden_weights_r = np.copy(self.hidden_layer_weights_r)
        new_hidden_biases_r = np.copy(self.hidden_layer_biases_r)

        if which == 5:

            i = randint(0, len(new_hidden_weights_r) - 1)

            new_w = new_hidden_weights_r[i] + np.random.normal(0, 0.1)
            while new_w < 0 or new_w >= 1: new_w = new_hidden_weights_r[i] + np.random.normal(0, 0.1)
            new_hidden_weights_r[i] = new_w

        elif which == 6:

            new_b = new_hidden_biases_r + np.random.normal(0, 0.1)
            while new_b < 0 or new_b >= 1: new_b = new_hidden_biases_r + np.random.normal(0, 0.1)
            new_hidden_biases_r = new_b

        new_hidden_weights_c = np.copy(self.hidden_layer_weights_c)
        new_hidden_biases_c = np.copy(self.hidden_layer_biases_c)

        if which == 7:

            i = randint(0, len(new_hidden_weights_c) - 1)

            new_w = new_hidden_weights_c[i] + np.random.normal(0, 0.1)
            while new_w < 0 or new_w >= 1: new_w = new_hidden_weights_c[i] + np.random.normal(0, 0.1)
            new_hidden_weights_c[i] = new_w

        elif which == 8:

            new_b = new_hidden_biases_c + np.random.normal(0, 0.1)
            while new_b < 0 or new_b >= 1: new_b = new_hidden_biases_c + np.random.normal(0, 0.1)
            new_hidden_biases_c = new_b

        return Individual_3(self.n_rows, copy_from= ((new_input_weights_r, new_input_biases_r, new_hidden_weights_r, new_hidden_biases_r), (new_input_weights_c, new_input_biases_c, new_hidden_weights_c, new_hidden_biases_c)))
    
    def fusion(self, id_2, keep_r):
        if keep_r: return Individual_3(self.n_rows, copy_from= (self.get_r_dense(), id_2.get_c_dense()))
        else: return Individual_3(self.n_rows, copy_from= (id_2.get_r_dense(), self.get_c_dense()))

In [114]:
STARTING_POPULATION = 10
N_ROWS = 4

In [201]:
def match(id_1, id_2, one_first, modified_start= None):

    if one_first: player = 0
    else: player = 1

    if modified_start is None: nim = Nim(N_ROWS)
    else: nim = modified_start

    n_moves_1 = 0
    n_moves_2 = 0

    while nim:
        if player == 0:
            ply = id_1.move(nim)
            if ply == -1: return False, n_moves_1, n_moves_2 * 2
            n_moves_1 += 1
        else:
            ply = id_2.move(nim)
            if ply == -1: return True, n_moves_1 * 2, n_moves_2
            n_moves_1 += 1
        
        nim.nimming(ply)
        player = 1 - player

    if player == 0: return True, n_moves_1 * 100, n_moves_2 * 20
    else: return False, n_moves_1 * 20, n_moves_2 * 100
    #return True if id_1 won else False

def match_mine(id_1, one_first, modified_start= None):

    if one_first: player = 0
    else: player = 1

    if modified_start is None: nim = Nim(N_ROWS)
    else: nim = modified_start

    n_moves = 0

    while nim:
        if player == 0: 
            ply = id_1.move(nim)
            if ply == -1: return player != 0, n_moves * 100
            n_moves += 1
        else: ply = nim_sum_move(nim)
        nim.nimming(ply)
        player = 1 - player

    return player == 0, n_moves
    #return True if id_1 won else False

def match_optimal(id_1, one_first, modified_start= None):

    if one_first: player = 0
    else: player = 1

    if modified_start is None: nim = Nim(N_ROWS)
    else: nim = modified_start

    n_moves = 0

    while nim:
        if player == 0: 
            ply = id_1.move(nim)
            if ply == -1: return player != 0, n_moves
            n_moves += 1
        else: ply = optimal(nim)
        nim.nimming(ply)
        player = 1 - player

    return player == 0, n_moves
    #return True if id_1 won else False

def tournament_dense(population):
    #print('starting tournament')
    scores = np.zeros(len(population))
    scores_against_mine = np.zeros(len(population))
    scores_against_optimal = np.zeros(len(population))

    scores_all = np.array([[0., 0., 0.] for _ in range(len(population))])

    for i_start in range(2):
        
        if i_start == 0: 
            nim = Nim(N_ROWS)
        else:
            nim = Nim(N_ROWS)
            r = randint(0, N_ROWS-1)
            c = randint(1, read_state(nim)[r])
            nim.nimming(Nimply(r, c))
            #show_board(nim)


        for i in range(len(population)):
            
            res, n_moves = match_mine(population[i], one_first= True, modified_start= deepcopy(nim))
            scores[i] += n_moves
            scores_all[i][2] += n_moves
            if res: 
                scores_against_mine[i] += 1
                scores_all[i][0] += 1
            res, n_moves = match_mine(population[i], one_first= False, modified_start= deepcopy(nim))
            scores[i] += n_moves
            scores_all[i][2] += n_moves
            if res: 
                scores_against_mine[i] += 1
                scores_all[i][0] += 1

            for _ in range(5):
                res, n_moves = match_optimal(population[i], one_first= True, modified_start= deepcopy(nim))
                scores[i] += n_moves
                scores_all[i][2] += n_moves
                if res: 
                    scores_against_optimal[i] += 1
                    scores_all[i][1] += 1
                res, n_moves = match_optimal(population[i], one_first= False, modified_start= deepcopy(nim))
                scores[i] += n_moves
                scores_all[i][2] += n_moves
                if res: 
                    scores_against_optimal[i] += 1
                    scores_all[i][1] += 1

            #for j in range(len(population)):
            #    if i != j:
            #        res, n_moves_i, n_moves_j = match(population[i], population[j], one_first= True, modified_start= deepcopy(nim))
            #        scores_all[i][2] += n_moves_i / (len(population) - 1)
            #        scores_all[j][2] += n_moves_j / (len(population) - 1)
            #        res, n_moves_i, n_moves_j = match(population[i], population[j], one_first= False, modified_start= deepcopy(nim))
            #        scores_all[i][2] += n_moves_i / (len(population) - 1)
            #        scores_all[j][2] += n_moves_j / (len(population) - 1)

    #best_idx = np.argsort(scores)[::-1]

    new_scores = np.array([[int(x) for x in y] for y in scores_all])
    scores_all = new_scores
    
    sort_idx = np.append(np.array([x for x in range(len(population))]).reshape(-1, 1), scores_all, axis= 1)

    best_idx = np.array(sorted(sort_idx, key=lambda tup: (tup[1], tup[2], tup[3]), reverse=True))[:, 0]

    new_population = [population[i] for i in best_idx[:STARTING_POPULATION]]
    return new_population, scores[best_idx[:STARTING_POPULATION]], scores_against_mine[best_idx[:STARTING_POPULATION]], scores_against_optimal[best_idx[:STARTING_POPULATION]], scores_all[best_idx[:STARTING_POPULATION]]

def procreation(population):
    #print('procreating')
    # create new individuals through mutation and fusion
    new_population = population.copy()

    not_worthy = STARTING_POPULATION // 2

    for i in range(len(population)):

        new_population.append(population[i])

        if i < (not_worthy):

            for _ in range(2): new_population.append(population[i].mutate())

            for _ in range(10): new_population.append(population[i].mutate_2())

            #for j in range(not_worthy):
            #    if i != j:
            #        new_population.append(population[i].fusion(population[j], True))
            #        new_population.append(population[i].fusion(population[j], False))

#            if i == 0:
#
#                for _ in range(10): new_population.append(population[i].mutate_2())
#
#                for j in range(len(population)):
#                    if i != j:
#                        new_population.append(population[i].fusion(population[j], True))
#                        new_population.append(population[i].fusion(population[j], False))

    return new_population

In [202]:
population = [Individual_3(N_ROWS, random_init= True) for _ in range(STARTING_POPULATION)]

bests = []
scores_bests = []

In [221]:
for i in range(100):

    print(f'=======================\nepoch {i}')

    print(f'population: {len(population)}')

    population = procreation(population)

    for x in bests[:-1]: population.append(x) 

    print(f'population after procreation: {len(population)}')

    population, scores, vs_mine, vs_optimal, all = tournament_dense(population)

    print(f'population after tournament: {len(population)}')
    #print(scores)
    #print(vs_optimal)
    #print(vs_mine)
    print(all)

    if len(bests) < 11: 
        bests.append(population[0])
        scores_bests.append(all[0])
    else:
        worst_best = -1
        worst_best_score = [999, 999, 999]
        for i in range(len(scores_bests)):
            if scores_bests[i][0] < worst_best_score[0]: worst_best = i
            elif scores_bests[i][0] == worst_best_score[0] and scores_bests[i][1] < worst_best_score[1]: worst_best = i
            elif scores_bests[i][0] == worst_best_score[0] and scores_bests[i][1] == worst_best_score[1] and scores_bests[i][2] < worst_best_score[2]: worst_best = i
        bests[i] = population[0]
        scores_bests[i] = all[0]

print(scores)

epoch 0
population: 10
population after procreation: 90
population after tournament: 10
[[   0    5  846]
 [   0    4 1150]
 [   0    4 1150]
 [   0    4  845]
 [   0    3 1156]
 [   0    3 1154]
 [   0    3 1153]
 [   0    3 1149]
 [   0    3 1148]
 [   0    3 1143]]
epoch 1
population: 10
population after procreation: 90
population after tournament: 10
[[   0    2 1247]
 [   0    2 1245]
 [   0    2 1243]
 [   0    2 1243]
 [   0    2  842]
 [   0    2  840]
 [   0    2  839]
 [   0    2  838]
 [   0    2  836]
 [   0    1 1251]]
epoch 2
population: 10
population after procreation: 90
population after tournament: 10
[[   0    4  838]
 [   0    3  842]
 [   0    3  838]
 [   0    2 1253]
 [   0    2 1251]
 [   0    2 1249]
 [   0    2 1247]
 [   0    2 1240]
 [   0    2  842]
 [   0    1 1251]]
epoch 3
population: 10
population after procreation: 90
population after tournament: 10
[[   0    3  838]
 [   0    2 1254]
 [   0    2 1253]
 [   0    2 1250]
 [   0    2 1245]
 [   0    2 124

In [222]:
champion = population[0]

In [236]:
nim = Nim(4)
show_board(nim)

champion_is_first = True

print('-----------------------')
print('starting the game!\n')
to_remove = my_nim_sum(read_state(nim))

if champion_is_first: print('champion is player 0')
else: print('champion is player 1')

print('\nif played correctly')
if to_remove == 0: print('player 1 should win')
else: print('player 0 should win')
print('-----------------------')

player = 0
while nim:
    if champion_is_first:
        if player == 0: ply = champion.move(nim)
        else: ply = optimal(nim)
    else:
        if player == 0: ply = optimal(nim)
        else: ply = champion.move(nim)
    
    assert ply != -1, 'not well trained - end up in invalid move'
    print(f"player {player} removes {ply[1]} from {ply[0]}")
    nim.nimming(ply)
    show_board(nim)
    #print(nim)
    player = 1 - player
    print('------------------------------')
if champion_is_first: print('champion is player 0')
else: print('champion is player 1')
print(f"end of game: Player {player} won!")

_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O O O O _ 
O O O O O O O 
-----------------------
starting the game!

champion is player 0

if played correctly
player 1 should win
-----------------------
player 0 removes 4 from 3
_ _ _ O _ _ _ 
_ _ O O O _ _ 
_ O O O O O _ 
O O O _ _ _ _ 
------------------------------
player 1 removes 1 from 1
_ _ _ O _ _ _ 
_ _ O O _ _ _ 
_ O O O O O _ 
O O O _ _ _ _ 
------------------------------
player 0 removes 1 from 3
_ _ _ O _ _ _ 
_ _ O O _ _ _ 
_ O O O O O _ 
O O _ _ _ _ _ 
------------------------------
player 1 removes 2 from 1
_ _ _ O _ _ _ 
_ _ _ _ _ _ _ 
_ O O O O O _ 
O O _ _ _ _ _ 
------------------------------
player 0 removes 1 from 3
_ _ _ O _ _ _ 
_ _ _ _ _ _ _ 
_ O O O O O _ 
O _ _ _ _ _ _ 
------------------------------
player 1 removes 2 from 2
_ _ _ O _ _ _ 
_ _ _ _ _ _ _ 
_ O O O _ _ _ 
O _ _ _ _ _ _ 
------------------------------
player 0 removes 1 from 2
_ _ _ O _ _ _ 
_ _ _ _ _ _ _ 
_ O O _ _ _ _ 
O _ _ _ _ _ _ 
----------------------

AssertionError: not well trained - end up in invalid move