# Метод Монте-Карло

In [1]:
# !pip install tqdm

In [2]:
import numpy as np
from tqdm.notebook import tqdm

In [3]:
N = 1000000
dim = 10
is_inside = []
for _ in tqdm(range(N)):
    x = 2 * np.random.rand(dim) - 1
    is_inside.append(sum(x ** 2) <= 1)
print(f'{"Monte Carlo:":15}{2 ** dim * np.mean(is_inside):.3f} +/- {2 ** dim * np.std(is_inside) / np.sqrt(N):.3f}')

from scipy.special import gamma

print(f'{"True:":15}{np.pi ** (dim / 2) / gamma(dim / 2 + 1):.3f}')

print(f'{"Dots inside:":15}{100 * np.mean(is_inside):.4f}%')

HBox(children=(FloatProgress(value=0.0, max=1000000.0), HTML(value='')))


Monte Carlo:   2.604 +/- 0.052
True:          2.550
Dots inside:   0.2543%


In [4]:
N = 1000000
batch = 100000
dim = 10
is_inside = 0
for _ in tqdm(range(N // batch)):
    x = 2 * np.random.rand(dim, batch) - 1
    is_inside += ((x ** 2).sum(axis=0) <= 1).sum()
mean = is_inside / N
print(f'{"Monte Carlo:":15}{2 ** dim * mean:.3f} +/- {2 ** dim * np.sqrt(mean * (1 - mean)) / np.sqrt(N):.3f}')
print(f'{"True:":15}{np.pi ** (dim / 2) / gamma(dim / 2 + 1):.3f}')
print(f'{"Dots inside:":15}{mean:.4f}%')

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Monte Carlo:   2.449 +/- 0.050
True:          2.550
Dots inside:   0.0024%


# 21

In [5]:
class Player:
    def __init__(self):
        self.hand = []
        self.points = [0]
        self.turn = 0
        
    def new_game(self, turn):
        self.hand = []
        self.points = [0]
        self.turn = turn
        
    def wanna_more(self, info):
        return False
    
    def give(self, card):
        self.hand.append(card)
        self.points = [p + card for p in self.points]
        if card == 11:
            self.points.extend([p - 10 for p in self.points])

class DummyPlayer(Player):
    
    def __init__(self, thres):
        super().__init__()
        self.thres = thres
    
    def wanna_more(self, info):
        points = max([p if p <= 21 else 0 for p in self.points])
        return points < self.thres and min(self.points) <= 21


def twenty_one(players):
    deck = [6, 7, 8, 9, 10, 2, 3, 4, 11] * 4
    np.random.shuffle(deck)
    for i, name in enumerate(players):
        players[name].new_game(i)
    hands = {name: [] for name in players}
    takes = {name: True for name in players}
    points = {name: [0] for name in players}
    while deck and any(takes.values()):
        for name in players:
            if takes[name]:
                if players[name].wanna_more(takes):
                    card = deck.pop(0)
                    players[name].give(card)
                    hands[name].append(card)
                    points[name] = [p + card for p in points[name]]
                    if card == 11:
                        points[name].extend([p - 10 for p in points[name]])
                    if not deck:
                        break
                else:
                    takes[name] = False
    for name in players:
        points[name] = max([p if p <= 21 else 0 for p in points[name]])
    return points

In [6]:
players = {'A': DummyPlayer(18), 'B': DummyPlayer(19)}
wins = {'A': 0, 'B': 0}

for i in tqdm(range(100000)):
    points = twenty_one(players)
    if points['A'] > points['B']:
        wins['A'] += 1
    if points['A'] < points['B']:
        wins['B'] += 1
        
print(wins)
N = (wins['A'] + wins['B'])
winrate = wins['A'] / N
std = np.sqrt(winrate * (1 - winrate))
print(f'Winrate A: {100 * (winrate - 2 * std / np.sqrt(N)):.2f}-{100 * (winrate + 2 * std / np.sqrt(N)):.2f}%')

HBox(children=(FloatProgress(value=0.0, max=100000.0), HTML(value='')))


{'A': 38984, 'B': 37732}
Winrate A: 50.46-51.18%


# Решение ДЗ №1.

Для начала нужно создать данные для обучения: 
 * Автомат с вероятностями перехода,
 * Игру для сбора данных против Dummy(18).

In [7]:
states = [(cards, points) for cards in np.arange(13) for points in np.arange(22)] 
#12 карт гарантированный 0: 1 * 4 + 2 * 4 + 3 * 4 = 24 --- минимально возможная сумма
#очков в игре от 0 до 21

machine = {x: {y: 0 for y in states} for x in states} #Автомат
winning = {x: 0 for x in states} #Суммарный выигрыш при конце в этой точке (W: 1, D: 0, L:-1)
winterm = {x: 0 for x in states} #Победы при конце в состоянии (W: 1, D: 0, L:0)
games = {x: 0 for x in states} #Всего игр начатых в данной точке
gamesterm = {x: 0 for x in states} #Всего игр законченных в данной точке

Теперь сделаем парня который берет ровно n карт:

В основном он такой же как и `DummyPlayer`, но чтобоы не реализовывать игру лишний раз заново, хочется записывать данные о состояниях для машины переходов с помощью функции `give()`. Также создадим метод `getpoints()`, который делает понятно что и который давно пора добавить:

In [8]:
class FinalCountdown(Player):
    
    def __init__(self, CountDown):
        super().__init__()
        self.CountDown = CountDown
        
    def getPoints(self):
        return max([t if t <= 21 else 0 for t in self.points])
    
    def wanna_more(self, info):
        return len(self.hand) < self.CountDown and min(self.points) <= 21
    
    def give(self, card):
        curopt = self.getPoints()
        self.points = [p + card for p in self.points]
        if card == 11:
            self.points.extend([p - 10 for p in self.points])
        machine[(len(self.hand), curopt)][(len(self.hand) + 1, self.getPoints())] += 1
        games[(len(self.hand), curopt)] += 1
        self.hand.append(card)


Начинаем обучение:

In [11]:
for i in range(13):
    print(f'{"Iteration number:":20}{i}')
    players = {'Dum': DummyPlayer(18), 'Countdown': FinalCountdown(i)}
    for j in tqdm(range(int(10000000 * (0.9) ** i))):
        points = twenty_one(players)
        term = (len(players['Countdown'].hand), players['Countdown'].getPoints())
        if points['Dum'] > points['Countdown']:
            winning[term] -= 1
        if points['Dum'] < points['Countdown']:
            winning[term] += 1
            winterm[term] += 1
        gamesterm[term] += 1

Iteration number:   0


HBox(children=(FloatProgress(value=0.0, max=10000000.0), HTML(value='')))


Iteration number:   1


HBox(children=(FloatProgress(value=0.0, max=9000000.0), HTML(value='')))


Iteration number:   2


HBox(children=(FloatProgress(value=0.0, max=8100000.0), HTML(value='')))


Iteration number:   3


HBox(children=(FloatProgress(value=0.0, max=7290000.0), HTML(value='')))


Iteration number:   4


HBox(children=(FloatProgress(value=0.0, max=6561000.0), HTML(value='')))


Iteration number:   5


HBox(children=(FloatProgress(value=0.0, max=5904900.0), HTML(value='')))


Iteration number:   6


HBox(children=(FloatProgress(value=0.0, max=5314410.0), HTML(value='')))


Iteration number:   7


HBox(children=(FloatProgress(value=0.0, max=4782969.0), HTML(value='')))


Iteration number:   8


HBox(children=(FloatProgress(value=0.0, max=4304672.0), HTML(value='')))


Iteration number:   9


HBox(children=(FloatProgress(value=0.0, max=3874204.0), HTML(value='')))


Iteration number:   10


HBox(children=(FloatProgress(value=0.0, max=3486784.0), HTML(value='')))


Iteration number:   11


HBox(children=(FloatProgress(value=0.0, max=3138105.0), HTML(value='')))


Iteration number:   12


HBox(children=(FloatProgress(value=0.0, max=2824295.0), HTML(value='')))




In [None]:
for stateIN in states:
    if games[stateIN]:
        for stateOUT in states:
            machine[stateIN][stateOUT] /= games[stateIN]
    if gamesterm[stateIN]:
        winning[stateIN] /= gamesterm[stateIN]
        winterm[stateIN] /= gamesterm[stateIN]



In [None]:
strategy = {state: False for state in states}
meanopt = {state: winning[state] for state in states}
for points in range(21, -1, -1):
    for cards in range(12, -1, -1):
        state = (cards, points)
        ifyes = 0
        for key in states:
            ifyes += meanopt[key] * machine[state][key]
        if points == cards == 0:
            print(winning[state], ifyes)
        strategy[state] = ifyes >= winning[state]
        meanopt[state] = max(ifyes, winning[state])

In [None]:
#some stepik stuff
print(winterm[(4,15)])
print(machine[(3,12)][(4, 18)])
print(meanopt[(5, 16)])

In [None]:
class DummyDestroyer(Player):
    def __init__(self):
        super().__init__()
    
    def wanna_more(self, info):
        return strategy[(len((self.hand)), max([p if p <= 21 else 0 for p in self.points]))] and min(self.points) <= 21

In [None]:
players = {'B': DummyPlayer(18), 'A': DummyDestroyer()}
wins = {'A': 0, 'B': 0}

for i in tqdm(range(100000)):
    points = twenty_one(players)
    if points['A'] > points['B']:
        wins['A'] += 1
    if points['A'] < points['B']:
        wins['B'] += 1
        
print(wins)
N = (wins['A'] + wins['B'])
winrate = wins['A'] / N
std = np.sqrt(winrate * (1 - winrate))
print(f'Winrate A: {100 * (winrate - 2 * std / np.sqrt(N)):.2f}-{100 * (winrate + 2 * std / np.sqrt(N)):.2f}%')

In [None]:
print(strategy)