# Code Written by:
**Shweta Tiwari**
*20 Oct 2023*

## Algorithm: Simple Nim

In [1]:
import time

In [2]:
!pip install --upgrade bokeh==2.4.3



In [3]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_notebook

# Algorithm

## Player: AI

In [4]:
%%time
class Player:

    def __init__(self, heap):
        self.history = {}
        self.distribution = np.ones((heap + 1, 3), dtype=int)
        self.cutoff = 1000

    def __call__(self, heap):
        # randomize move based on previous games
        dist = self.distribution[heap].cumsum()
        rnd = np.random.randint(dist[2])
        move = 1 if rnd < dist[0] else 2 if rnd < dist[1] else 3

        # store move in history
        self.history[heap] = min(heap, move)

        return self.history[heap]

    def learn(self, winner):
        # update move distribution
        for heap, move in self.history.items():
            if winner is self:
                self.distribution[heap][move - 1] += 1
            else:
                self.distribution[heap][move - 1] -= 1
                self.distribution[heap] += 1

        # normalize distribution to speed learning up
        normalize = np.argwhere(self.distribution.sum(axis=1) > self.cutoff)
        for heap in normalize:
            self.distribution[heap] -= self.distribution[heap].min() - 1

        # reset game history
        self.history = {}

    def strategy(self):
        distribution = self.distribution[1:]
        return distribution.T / distribution.sum(axis=1)

CPU times: user 0 ns, sys: 46 µs, total: 46 µs
Wall time: 51.3 µs


# Run

## Opponents

In [5]:
%%time
def expert_opponent(heap):
    return heap % 4 or min(heap, np.random.randint(1, 4))

CPU times: user 5 µs, sys: 1 µs, total: 6 µs
Wall time: 10 µs


In [6]:
%%time
def random_opponent(heap):
    return min(heap, np.random.randint(1, 4))

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 10 µs


In [7]:
%%time
def take_n_opponent(take):
    return lambda heap: min(heap, take)

CPU times: user 6 µs, sys: 1e+03 ns, total: 7 µs
Wall time: 11.9 µs


## Training

In [8]:
%%time
def play(heap, player, opponent):
    players = player, opponent
    wins = 0

    for game in range(100001):
        # update plot periodically
        if game % 10000 == 0:
            print(game, 'games, W/L ratio', wins / 10000)
            wins = 0

        # a single game
        h = heap
        while h:
            h -= players[0](h)
            players = players[1], players[0]

        winner = players[1]
        wins += winner is player

        # let player learn
        player.learn(winner)

    # plot distribution
    plot_strategy(heap, player)

CPU times: user 8 µs, sys: 0 ns, total: 8 µs
Wall time: 12.2 µs


In [9]:
%%time
def plot_strategy(heap, player):
    output_notebook()

    # data
    take_1, take_2, take_3 = player.strategy()
    take_2 += take_1
    take_3 += take_2
    kwargs = {'x': range(1, heap + 1), 'width': .8}

    # plot
    plot = figure(plot_width=600, plot_height=400)
    plot.vbar(**kwargs, bottom=0, top=take_1, legend='take 1', color='#a44444')
    plot.vbar(**kwargs, bottom=take_1, top=take_2, legend='take 2', color='#88a888')
    plot.vbar(**kwargs, bottom=take_2, top=take_3, legend='take 3', color='#ccccac')
    show(plot)

CPU times: user 14 µs, sys: 0 ns, total: 14 µs
Wall time: 18.6 µs


## Learning

In [10]:
%%time
HEAP = 21

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 8.34 µs


In [11]:
%%time
play(HEAP, Player(HEAP), expert_opponent)

0 games, W/L ratio 0.0
10000 games, W/L ratio 0.0087
20000 games, W/L ratio 0.0153
30000 games, W/L ratio 0.0199
40000 games, W/L ratio 0.0414
50000 games, W/L ratio 0.0673
60000 games, W/L ratio 0.334
70000 games, W/L ratio 0.4972
80000 games, W/L ratio 0.4988
90000 games, W/L ratio 0.4992
100000 games, W/L ratio 0.4995




CPU times: user 20.3 s, sys: 253 ms, total: 20.5 s
Wall time: 22.8 s


In [12]:
%%time
play(HEAP, Player(HEAP), random_opponent)

0 games, W/L ratio 0.0
10000 games, W/L ratio 0.8692
20000 games, W/L ratio 0.9558
30000 games, W/L ratio 0.9672
40000 games, W/L ratio 0.9691
50000 games, W/L ratio 0.97
60000 games, W/L ratio 0.9677
70000 games, W/L ratio 0.9715
80000 games, W/L ratio 0.9718
90000 games, W/L ratio 0.9717
100000 games, W/L ratio 0.973




CPU times: user 30.8 s, sys: 189 ms, total: 31 s
Wall time: 33.7 s


In [13]:
%%time
play(HEAP, Player(HEAP), take_n_opponent(1))

0 games, W/L ratio 0.0
10000 games, W/L ratio 0.9976
20000 games, W/L ratio 0.9999
30000 games, W/L ratio 0.9999
40000 games, W/L ratio 0.9999
50000 games, W/L ratio 1.0
60000 games, W/L ratio 1.0
70000 games, W/L ratio 1.0
80000 games, W/L ratio 1.0
90000 games, W/L ratio 1.0
100000 games, W/L ratio 1.0




CPU times: user 25.3 s, sys: 106 ms, total: 25.4 s
Wall time: 25.6 s


In [14]:
%%time
play(HEAP, Player(HEAP), take_n_opponent(3))

0 games, W/L ratio 0.0
10000 games, W/L ratio 0.977
20000 games, W/L ratio 0.9979
30000 games, W/L ratio 0.9979
40000 games, W/L ratio 0.9976
50000 games, W/L ratio 0.9994
60000 games, W/L ratio 0.9992
70000 games, W/L ratio 0.9998
80000 games, W/L ratio 0.9999
90000 games, W/L ratio 1.0
100000 games, W/L ratio 0.9997




CPU times: user 14.2 s, sys: 73 ms, total: 14.2 s
Wall time: 14.4 s


# The End