In [1]:
import numpy as np
from Strategy import Player
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from scipy.stats import norm

experiment_data = pd.DataFrame(columns = ["Hands played", "Percent won", "Bet size", "Hit zero", "Percent per hour"])

try:
    experiment_data = pd.read_csv("experiment_data.csv")
except:
    experiment_data = pd.DataFrame(columns = ["Hands played", "Percent won", "Bet size", "Hit zero", "Percent per hour"])

In [2]:
class Deck():
    def __init__(self,):
        self.cards = ['2','3','4','5','6','7','8','9','10','J','Q','K','A']
        self.face_cards = ['J','Q','K']
    def draw(self,):
        card_drawn = np.random.choice(self.cards)
        return card_drawn

In [3]:
class BlackJack():
    def __init__(self, player):
        self.deck = Deck()
        self.hands_to_play = []
        self.dealer_public = None
        self.dealer_hidden = None
        self.bet = None
        self.hands_to_resolve = []
        self.player = player      
        self.bankrolls = []
    def deal(self, player_cards):
        if player_cards== None:
            player_hand = [self.deck.draw(), self.deck.draw()]
        else:
            player_hand = player_cards
        
        self.dealer_public = self.deck.draw()
        self.dealer_hidden = self.deck.draw()
        self.hands_to_play.append(player_hand)
        
        
    def reset(self, bet, player_cards = None):
        self.bet = bet
        self.bets = [bet]           
        self.player.bankroll -= self.bet
        self.deal(player_cards)
        return (self.hands_to_play, self.hands_to_resolve, self.dealer_public)
    
    def hand_to_value(self, hand):
        value = 0
        aces = 0
        for card in hand:
            if card in self.deck.face_cards:
                value = value + 10
            elif card == 'A':
                value = value + 11
                aces = aces + 1
            else:
                value = value + int(card)
        while (value > 21 and aces > 0):
            value = value - 10
            aces = aces - 1
        return value
    
    
    def dealer_hit_strategy(self, dealer_hand):
        dealer_score = self.hand_to_value(dealer_hand)
        
        if(dealer_score < 17):
            dealer_hand.append(self.deck.draw())
            return self.dealer_hit_strategy(dealer_hand)

        elif(dealer_score in range(17, 22)):
            return dealer_score, self.check_blackjack(dealer_hand), dealer_hand

        elif(dealer_score > 21):
            return 0, self.check_blackjack(dealer_hand), dealer_hand
    
    def check_blackjack(self, hand):
        value = self.hand_to_value(hand)
        if value == 21 and len(hand) == 2:
            return 1
        else:
            return 0
            
    def resolve(self, player_hand, dealer_score, dealer_has_blackjack):
        
        player_score = self.hand_to_value(player_hand)
        player_has_blackjack = self.check_blackjack(player_hand)
        
        if(self.check_player_bust(player_hand)):
            #print("player busts")
            return 0
        
        if(dealer_has_blackjack):
            if(player_has_blackjack):
                #print("push")
                return 1
            else:
                #print("player loses")      
                return 0
        
        if(player_score > dealer_score):
            if player_has_blackjack:
                #print("Blackjack")
                return 2.5
            else:
                #print("Player wins")
                return 2
        
        if(player_score == dealer_score):
            #print("push")
            return 1
        
        if(player_score < dealer_score):
            #print("player loses")
            return 0
           
    
    def check_player_bust(self, hand):
        value = self.hand_to_value(hand)
        if value > 21:
            return True
        else:
            return False

    def step_hand(self, action, player_hand):
        
        player_has_blackjack = self.check_blackjack(player_hand)
        
        if player_has_blackjack:
            return player_hand, 1
        
        else:
            if action == 'stand':
                return player_hand, 1
            
            elif action == 'hit':
                player_hand.append(self.deck.draw())
                is_player_bust = self.check_player_bust(player_hand)
                if is_player_bust:
                    return player_hand, 1
                else:
                    return player_hand, 0
                
            elif action == 'double':
                
                self.player.bankroll -= self.bet
                self.bet=self.bet * 2
                player_hand.append(self.deck.draw())
                
                return player_hand, 1
            
            elif action == 'split':
                
                assert len(player_hand) == 2, 'Player has more than two cards'
                assert player_hand[0] == player_hand[1], 'Player`s cards are different'
                self.player.bankroll -= self.bet
                if player_hand[0] == 'A':
                    self.hands_to_resolve.append([player_hand[0], self.deck.draw()])
                    self.hands_to_resolve.append([player_hand[0], self.deck.draw()])
                    self.bets.append(self.bet)
                    
                    self.hands_to_play.remove(player_hand)
                else:
                    self.hands_to_play.append([player_hand[0], self.deck.draw()])
                    self.hands_to_play.append([player_hand[0], self.deck.draw()])
                    
                
                return player_hand, 1
    def update_player_stats(self, reward):
        if reward == 2.5:
            self.player.blackjacks += 1
        elif reward == 2:
            self.player.wins += 1
        elif reward == 1:
            self.player.draws += 1
        else:
            self.player.loses+= 1
            
    def step(self):
        while(len(self.hands_to_play) > 0):
            for player_hand in self.hands_to_play:
                done = 0
                
                while not done:
                    action = self.player.choose_action(player_hand, self.dealer_public)
                    #print(action, player_hand)
                    player_hand, done = self.step_hand(action, player_hand)
                    
                    
              
                self.bets.append(self.bet)
                try:
                    self.hands_to_play.remove(player_hand)
                    self.hands_to_resolve.append(player_hand)
                except:
                    pass
                #print("final_hand:{}".format(player_hand))
                if action == 'double':
                    self.bet = self.bet/2
        #print([self.dealer_public, self.dealer_hidden])
        
        dealer_score, dealer_has_blackjack, dealer_hand = self.dealer_hit_strategy([self.dealer_public, self.dealer_hidden])
        #print(dealer_hand)
        #print(self.hands_to_resolve)
        while self.hands_to_resolve:
            player_hand=self.hands_to_resolve.pop()
            reward = self.resolve(player_hand, dealer_score, dealer_has_blackjack)
            self.update_player_stats(reward)
            self.player.bankroll += reward * self.bets.pop()
            self.bankrolls.append(self.player.bankroll)
        

In [4]:
def add_to_database():
    global experiment_data
    percent_won = 100 * (env.player.bankroll - initial_money)/initial_money
    percent_per_hour = percent_won/hands_num * 350
    new_experiment_data = [np.log10(hands_num), percent_won, bet_size, hit_min, percent_per_hour]
    experiment_data = experiment_data.append(pd.Series(new_experiment_data, index = experiment_data.columns), ignore_index = 1)

In [5]:
initial_money = 100
hands_num = 31500 # will be saved as a power of ten
bet_size = 0.01 # percent

In [15]:
parameters = {
    'initial_money': 1000,
    #If flat bet is false the actual bet is bet_size * player_bankroll
    'flat_bet':True,
    'bet_size':2,
    'num_hands':31500,
    'sample_size':100
}

In [16]:
def grid_search_experiment(experiment, grid_parameters):
    number_of_keys = {}
    for key in grid_parameters.key()
        
    return NotImplemented

In [17]:
def run_bet_size_experiment(parameters):
    rewards = []
    initial_money = parameters['initial_money']
    for sample_num in tqdm(range(parameters['sample_size'])):
        env = BlackJack(Player(initial_money))
        for i in range(parameters['num_hands']):
            if parameters['flat_bet']:
                bet = min(parameters['bet_size'], env.player.bankroll)
                bet = max(2, bet)
            else:
                bet = max(2, parameters['bet_size'] * env.player.bankroll)
            if env.player.bankroll < 2:
                hit_min = 1
                break
            else:
                env.reset(bet)
                env.step()
        rewards.append(env.player.bankroll - initial_money)
    results = {}
    rewards = np.array(rewards)
    results['bust_percentage'] = len(np.where(rewards < -95)[0])/len(rewards)
    x1 = np.where(rewards >= -95)[0]
    x2 = np.where(rewards <= 0)[0]
    results['loses_percentage'] = len(np.intersect1d(x1,x2))/len(rewards)
    results['wins_percentage'] = len(np.where(rewards > 0)[0])/len(rewards)
    results['EV'] = rewards.mean()
    results['Deviation'] = rewards.std()
    results['rewards'] = rewards
    return results

In [18]:
%%time
run_bet_size_experiment(parameters)

100%|██████████| 100/100 [03:23<00:00,  2.03s/it]

CPU times: user 3min 22s, sys: 144 ms, total: 3min 22s
Wall time: 3min 23s





{'bust_percentage': 0.11,
 'loses_percentage': 0.01,
 'wins_percentage': 0.88,
 'EV': 480.06,
 'Deviation': 426.34893737407157,
 'rewards': array([1514., 1001.,  112.,  475., 1139.,  456.,  376., 1194.,  641.,
        -185., 1007., -244.,  216.,  106., 1443., -261.,  644.,  417.,
         777.,  281.,  252., -238.,  843.,  260.,  649., 1286., 1308.,
         759.,  394.,  706.,  475.,  115.,  359.,  251.,  232.,   63.,
         518., 1175.,  793.,  453.,  517.,  910.,  681., 1117.,  173.,
         148., -120.,  510., -168.,  925.,  354.,  755.,  486., -220.,
         550.,  474.,  110.,  466.,  229.,   38.,   10.,  445.,  632.,
         763.,  796.,  587.,  595.,  491.,  315.,   70.,  395., 1022.,
         713.,  493.,  790.,  655., -349.,  556.,  953., -487., -265.,
         736.,  609.,  473., 1392.,  -41.,  610.,  199.,  158.,  126.,
         862., -355.,  428., 1039.,  171.,  179.,  781.,  407.,  721.,
         634.])}

%%time
rewards = []
for sample_num in range(100):
    hit_min = 0
    env = BlackJack(Player(initial_money))
    for i in tqdm(range(hands_num)):
        this_bet = 2 
        if env.player.bankroll < 2:
            break
        else:        
            env.reset(this_bet)
            env.step()
            if(env.player.bankroll < 0):
                hit_min = 1
    rewards.append(env.player.bankroll - initial_money)
    #add_to_database()


experiment_data.to_csv("experiment_data.csv", index = False)

In [10]:
plt.plot(env.bankrolls)
plt.show()

NameError: name 'env' is not defined

In [None]:
x = experiment_data["Percent per hour"].values

In [None]:
plt.hist(x, bins = 100)

In [None]:
x.mean(), x.std()

In [None]:
cdf_probability = norm.cdf(0, loc=x.mean(), scale=x.std())

In [None]:
1-cdf_probability

Blackjack percentage 0.04
Wins percentage 0.38
draws percentage 0.10
loses percentage 0.48