In [106]:
import numpy as np
import pandas as pd

In [107]:
pd.set_option("max_colwidth", 0)

# Game Deck

In [108]:
class GameDeck():
    """ Create the game deck """
    values = ["A", 2, 3, 4, 5, 6, 7, 8, 9, 10, "J", "Q", "K"]
    def __init__(self, num_decks=1, values=values, suits=4):
        self.num_decks = num_decks
        self.cards = values * num_decks * suits
        
    def __str__(self):
        return "{} cards left\n{}".format(len(self.cards), self.cards)
    
    def shuffle_cards(self):
        np.random.shuffle(self.cards)
        
    def deal_card(self, hand=None):
        if hand == None:
            return self.cards.pop(0)
        else:
            hand.append(self.cards.pop(0))
            
    def return_cards(self, discards):
        for i in discards:
            positions = len(self.cards)
            self.cards.insert(np.random.choice(positions), i)

In [109]:
test = GameDeck()

In [110]:
print (test)
test.shuffle_cards()
print ()
print (test)
test.deal_card()
print ()
print (test)
test.return_cards(["X", "Y", "test"])
print ()
print (test)

52 cards left
['A', 2, 3, 4, 5, 6, 7, 8, 9, 10, 'J', 'Q', 'K', 'A', 2, 3, 4, 5, 6, 7, 8, 9, 10, 'J', 'Q', 'K', 'A', 2, 3, 4, 5, 6, 7, 8, 9, 10, 'J', 'Q', 'K', 'A', 2, 3, 4, 5, 6, 7, 8, 9, 10, 'J', 'Q', 'K']

52 cards left
[7, 10, 'K', 'Q', 2, 4, 5, 'Q', 3, 5, 'K', 3, 2, 6, 9, 'K', 8, 'K', 3, 5, 6, 8, 9, 'J', 6, 'Q', 10, 'A', 'J', 6, 7, 7, 3, 2, 7, 'A', 'A', 10, 4, 4, 10, 'Q', 'J', 4, 'J', 'A', 8, 8, 9, 9, 2, 5]

51 cards left
[10, 'K', 'Q', 2, 4, 5, 'Q', 3, 5, 'K', 3, 2, 6, 9, 'K', 8, 'K', 3, 5, 6, 8, 9, 'J', 6, 'Q', 10, 'A', 'J', 6, 7, 7, 3, 2, 7, 'A', 'A', 10, 4, 4, 10, 'Q', 'J', 4, 'J', 'A', 8, 8, 9, 9, 2, 5]

54 cards left
[10, 'X', 'K', 'Q', 2, 4, 5, 'Q', 3, 5, 'test', 'K', 3, 2, 6, 9, 'K', 8, 'K', 3, 5, 6, 8, 9, 'J', 6, 'Q', 10, 'A', 'J', 6, 7, 7, 3, 2, 7, 'A', 'A', 10, 'Y', 4, 4, 10, 'Q', 'J', 4, 'J', 'A', 8, 8, 9, 9, 2, 5]


# Points calculator

In [111]:
def s_to_l(hand):
    try:
        hand = hand.split(",")
    except:
        pass
    
    working_hand = []
    
    for i in hand:
        try:
            working_hand.append(int(i))
        except:
            working_hand.append(i)
    return working_hand

In [112]:
def calc_ace(hand, count_A):
    # For each Ace, see if adding 11 causes it to bust
    # If no, then take Ace as 11
    # Else, take Ace as 1
    working_hand = hand.copy()
    
    # Remove Aces
    working_hand = [i if type(i) == int else 10 for i in working_hand if i != "A"]
    for k in range(count_A):
        if sum(working_hand) + 11 <= 21:
            working_hand.append(11)
        else:
            if 11 in working_hand and sum(working_hand) + 1 > 21:
                working_hand.remove(11)
                working_hand.append(1)
            working_hand.append(1)
    return working_hand

In [113]:
def calc_points(hand):
    working_hand = s_to_l(hand)
    
    if "A" in working_hand:
        
        # Count number of Aces
        count_A = working_hand.count("A")
        working_hand = calc_ace(working_hand, count_A)
        
    else:
        # Replace J, Q, and K with 10
        working_hand = [10 if i in ["J", "Q", "K"] else int(i) for i in working_hand]
    
    return sum(working_hand)

In [114]:
for i in [["A", "K"], [10,10], ["A", 5], ["A", "A", 8], [3,3], ["A", "A", 10], ["A", "A"]]:
    print (i, calc_points(i))

['A', 'K'] 21
[10, 10] 20
['A', 5] 16
['A', 'A', 8] 20
[3, 3] 6
['A', 'A', 10] 12
['A', 'A'] 12


# Soft 17

- Some casinos require the dealer to hit on a soft 17
- We need to know when it is a soft 17

In [115]:
def soft_17(hand):
    working_hand = s_to_l(hand)
    
    Ace_11 = False
    if calc_points(working_hand) == 17 and "A" in working_hand:
        count_A = working_hand.count("A")
        working_hand = calc_ace(working_hand, count_A)
        if 11 in working_hand[-count_A:]:
            return True
    return False

In [116]:
for i in [["K", 7], ["A", 3, 3], ["A", "A", 5], ["A", "A", "A", 4]]:
    print (i, soft_17(i))

['K', 7] False
['A', 3, 3] True
['A', 'A', 5] True
['A', 'A', 'A', 4] True


# Play game

- strategies
    - 0: random
    - 1: recommended
    - 2: statistical analysis
    - 3: machine learning

In [117]:
def player_turn(game_deck, d_open, p_hand, strategy=0):
    new_hand = p_hand.copy()
    while calc_points(new_hand) <= 11:
        game_deck.deal_card(new_hand)
        
    if strategy == 0:
        while calc_points(new_hand) <= 18:
            if np.random.random() <= 0.5:
                game_deck.deal_card(new_hand)
            else:
                break
    elif strategy == 1:
        while calc_points(new_hand) < 17:
            if d_open > 6:
                game_deck.deal_card(new_hand)
            else:
                break
                
    return game_deck, p_hand, new_hand

In [118]:
def dealer_turn(game_deck, hand, soft=1):
    new_hand = hand.copy()
    if soft == 1:
        while calc_points(new_hand) < 17 or soft_17(new_hand):
            game_deck.deal_card(new_hand)
    else:
        while calc_points(new_hand) < 17:
            game_deck.deal_card(new_hand)
    return game_deck, hand, new_hand

In [119]:
def l_to_s(l):
    l = [str(i) for i in l]
    return ",".join(l)

In [120]:
def play_split(game_deck, p_hand, d_hand, d_open, num_decks, strategy=0, soft=1, game_type=0):
    to_split = np.random.random() <= 0.5
    if (strategy == 0 and to_split) or (strategy == 1 and p_hand[0] == 8):
        # Splits
        split = 1
        p_hand_1 = [p_hand[0]]
        p_hand_2 = [p_hand[1]]
        game_deck.deal_card(p_hand_1)
        game_deck.deal_card(p_hand_2)
        game_deck, p_hand_1, p_new_hand_1 = player_turn(game_deck, d_open, p_hand_1, strategy=strategy)
        game_deck, p_hand_2, p_new_hand_2 = player_turn(game_deck, d_open, p_hand_2, strategy=strategy)

        # If either is below 21, dealer gets to play
        if calc_points(p_new_hand_1) <= 21 or calc_points(p_new_hand_2) <= 21:
            game_deck, d_hand, d_new_hand = dealer_turn(game_deck, d_hand)
            # If hand 1 is above 21
            if calc_points(p_new_hand_1) > 21:
                # Dealer doesn't get to play for this hand
                d_new_hand_1 = d_hand.copy()
                out_1 = np.array([l_to_s(d_hand), l_to_s(d_new_hand_1), l_to_s(p_hand_1), l_to_s(p_new_hand_1), strategy, split, num_decks, soft])       
            else:
                out_1 = np.array([l_to_s(d_hand), l_to_s(d_new_hand), l_to_s(p_hand_1), l_to_s(p_new_hand_1), strategy, split, num_decks, soft])       
            # If hand 2 is above 21
            if calc_points(p_new_hand_2) > 21:
                # Dealer doesn't get to play for this hand
                d_new_hand_2 = d_hand.copy()
                out_2 = np.array([l_to_s(d_hand), l_to_s(d_new_hand_2), l_to_s(p_hand_2), l_to_s(p_new_hand_2), strategy, split, num_decks, soft])
            else:
                out_2 = np.array([l_to_s(d_hand), l_to_s(d_new_hand), l_to_s(p_hand_2), l_to_s(p_new_hand_2), strategy, split, num_decks, soft])       
        # They are both above 21, dealer doesn't play at all
        else:
            d_new_hand = d_hand.copy()
            out_2 = np.array([l_to_s(d_hand), l_to_s(d_new_hand), l_to_s(p_hand_2), l_to_s(p_new_hand_2), strategy, split, num_decks, soft])       
            out_1 = np.array([l_to_s(d_hand), l_to_s(d_new_hand), l_to_s(p_hand_1), l_to_s(p_new_hand_1), strategy, split, num_decks, soft])       
            
        if game_type == 0:
            try:
                game_deck.return_cards(p_new_hand_1)
            except:
                game_deck.return_cards(p_hand_1)
            try:
                game_deck.return_cards(d_new_hand)
            except:
                game_deck.return_cards(d_hand)
            try:
                game_deck.return_cards(p_new_hand_2)
            except:
                game_deck.return_cards(p_hand_2)
        return np.array([out_1, out_2]), game_deck
    else:
        return "No split", game_deck

In [121]:
def card_value(card):
    try:
        return int(card)
    except:
        if card == "A":
            return 1
        else:
            return 10

In [122]:
def play_game(game_deck, num_decks=4, strategy=0, soft=1, game_type=0):
    d_hand = []
    p_hand = []
    
    for _ in range(2):
        game_deck.deal_card(p_hand)
        game_deck.deal_card(d_hand)
    
    d_open = card_value(d_hand[0])
    split = 0
    if calc_points(p_hand) != 21 and calc_points(d_hand) != 21:
        if (p_hand[0] == p_hand[1]):
            out, deck = play_split(game_deck, p_hand, d_hand, d_open, num_decks, strategy=strategy, soft=soft, game_type=game_type)
        else:
            out = "No split"
        if type(out) == str:
            game_deck, p_hand, p_new_hand = player_turn(game_deck, d_open, p_hand, strategy=strategy)
            if calc_points(p_new_hand) <= 21:
                game_deck, d_hand, d_new_hand = dealer_turn(game_deck, d_hand, soft=soft)
            else:
                d_new_hand = d_hand.copy()
            out = np.array([l_to_s(d_hand), l_to_s(d_new_hand), l_to_s(p_hand), l_to_s(p_new_hand), strategy, split, num_decks, soft])
            if game_type == 0:
                game_deck.return_cards(p_new_hand)
                game_deck.return_cards(d_new_hand)
    else:
        p_new_hand = p_hand.copy()
        d_new_hand = d_hand.copy()
        out = np.array([l_to_s(d_hand), l_to_s(d_new_hand), l_to_s(p_hand), l_to_s(p_new_hand), strategy, split, num_decks, soft])
        if game_type == 0:
            game_deck.return_cards(p_new_hand)
            game_deck.return_cards(d_new_hand)
    
    return out, game_deck

In [123]:
play_game(GameDeck(), strategy=1)

(array(['2,4', '2,4,5,6', 'A,3', 'A,3', '1', '0', '4', '1'], dtype='<U7'),
 <__main__.GameDeck at 0x250540f5088>)

In [124]:
a = play_game(GameDeck())[0]
a

array(['2,4', '2,4,5,6', 'A,3', 'A,3', '0', '0', '4', '1'], dtype='<U7')

In [125]:
deck = GameDeck()

In [126]:
deck.shuffle_cards()

In [127]:
a = np.array([1])
while a.ndim < 2:
    a, deck = play_game(deck)
print (a)

[['2,9' '2,9,A,5' '5,8' '5,8,5,2' '0' '1' '4' '1']
 ['2,9' '2,9,A,5' '5,Q' '5,Q,A' '0' '1' '4' '1']]


In [128]:
a.ndim

2

# Generate cards

- game_type
    - 0: continuous shuffling
    - 1: play full deck

In [129]:
def gen_df(num_games=5000, game_type=0):
    data = []
    deck = GameDeck()
    deck.shuffle_cards()
    if game_type == 0:
        for _ in range(num_games):
            try:
                a, deck = play_game(deck, strategy=np.random.randint(2), soft=np.random.randint(2), game_type=game_type)
                if a.ndim == 2:
                    data.extend(a)
                else:
                    data.append(a)
#                 print (deck)
#                 print (a.ndim)
            except:
                print ("Con error")
                print (a)
                print (deck)
                print ()
    else:
        while len(data) < num_games:
            deck = GameDeck()
            deck.shuffle_cards()
            while len(deck.cards) > 10:
                try:
                    a, deck = play_game(deck, strategy=np.random.randint(2), soft=np.random.randint(2), game_type=game_type)
                    if a.ndim == 2:
                        data.extend(a)
                    else:
                        data.append(a)
                except:
                    print ("One deck error")
                    print (a)
                    print (deck)
                    print ()

    df = pd.DataFrame(data, columns=["d_hand", "d_final_hand", "p_hand", "p_final_hand", "strategy", "split", "num_decks", "soft_17"])
    df["game_type"] = game_type
    return df

In [130]:
out= gen_df(num_games=200, game_type=0)

In [131]:
out

Unnamed: 0,d_hand,d_final_hand,p_hand,p_final_hand,strategy,split,num_decks,soft_17,game_type
0,"6,Q","6,Q,10","J,Q","J,Q",1,0,4,0,0
1,"Q,4","Q,4",810,"8,10,J",0,0,4,0,0
2,"J,6","J,6",78,"7,8,K",1,0,4,1,0
3,"6,Q","6,Q,A","3,J","3,J",1,0,4,1,0
4,"10,Q","10,Q","K,5","K,5,6",0,0,4,1,0
...,...,...,...,...,...,...,...,...,...
200,86,"8,6,K",109,109,1,0,4,0,0
201,"6,J","6,J,6","5,A","5,A",0,0,4,0,0
202,"A,K","A,K",79,79,0,0,4,0,0
203,25,25,"J,8","J,8,7",0,0,4,1,0


# Add columns

In [132]:
def add_columns(original):
    df = original.copy()
    df["strategy"] = df["strategy"].astype(int)
    df["split"] = df["split"].astype(int)
    df["num_decks"] = df["num_decks"].astype(int)
    df["soft_17"] = df["soft_17"].astype(int)
    
    df["d_open"] = df["d_hand"].apply(lambda x: card_value(x.split(",")[0]))
    df["d_initial"] = df["d_hand"].apply(calc_points)
    df["d_final"] = df["d_final_hand"].apply(calc_points)
    df["d_hit"] = df["d_final"]-df["d_initial"] > 0
    df["d_hit"] = df["d_hit"].astype(int)
    df["d_bust"] = df["d_final"] > 21
    df["d_bust"] = df["d_bust"].astype(int)
    
    df["p_initial"] = df["p_hand"].apply(calc_points)
    df["p_final"] = df["p_final_hand"].apply(calc_points)
    df["p_hit"] = df["p_final"]-df["p_initial"] > 0
    df["p_hit"] = df["p_hit"].astype(int)
    df["p_bust"] = df["p_final"] > 21
    df["p_bust"] = df["p_bust"].astype(int)
    
    df["p_win"] = (df["p_final"] > df["d_final"]) & (df["p_bust"] == 0) | (df["d_bust"] == 1)
    df["p_win"] = df["p_win"].astype(int)

    return df

# Generate CSVs

In [133]:
con = gen_df(game_type=0)

con.shape

one = gen_df(game_type=1)

one.shape

df = pd.concat([con,one])

In [134]:
df.shape

(10075, 9)

In [135]:
try:
    old_df = pd.read_csv("cards.csv")
    df = pd.concat([df, old_df])
except:
    pass
df.to_csv("cards.csv", index=False)

In [136]:
df = pd.read_csv("cards.csv")

In [137]:
df.shape

(100819, 9)

In [138]:
df2 = add_columns(df)

In [139]:
df2.to_csv("blackjack_data.csv", index=False)

# Test data

In [140]:
df2.describe()

Unnamed: 0,strategy,split,num_decks,soft_17,game_type,d_open,d_initial,d_final,d_hit,d_bust,p_initial,p_final,p_hit,p_bust,p_win
count,100819.0,100819.0,100819.0,100819.0,100819.0,100819.0,100819.0,100819.0,100819.0,100819.0,100819.0,100819.0,100819.0,100819.0,100819.0
mean,0.494718,0.031403,4.0,0.49938,0.496236,6.530049,14.55801,19.193644,0.524445,0.22594,14.596743,18.420427,0.444152,0.167409,0.417044
std,0.499975,0.174405,0.0,0.500002,0.499988,3.143105,4.049339,3.62754,0.499405,0.418202,4.033026,3.779121,0.496874,0.373342,0.493073
min,0.0,0.0,4.0,0.0,0.0,1.0,4.0,4.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0
25%,0.0,0.0,4.0,0.0,0.0,4.0,12.0,17.0,0.0,0.0,12.0,16.0,0.0,0.0,0.0
50%,0.0,0.0,4.0,0.0,0.0,7.0,15.0,19.0,1.0,0.0,15.0,19.0,0.0,0.0,0.0
75%,1.0,0.0,4.0,1.0,1.0,10.0,18.0,21.0,1.0,0.0,18.0,21.0,1.0,0.0,1.0
max,1.0,1.0,4.0,1.0,1.0,10.0,21.0,26.0,1.0,1.0,21.0,28.0,1.0,1.0,1.0


## Check hands and points

In [141]:
print (df2[["d_hand", "d_initial"]].as_matrix()[np.random.randint(len(df2), size=5)])

[['J,4' 14]
 ['J,A' 21]
 ['4,8' 12]
 ['K,J' 20]
 ['9,5' 14]]


  """Entry point for launching an IPython kernel.


In [142]:
print (df2[["d_final_hand", "d_final"]].as_matrix()[np.random.randint(len(df2), size=5)])

[['J,6,Q' 26]
 ['7,Q' 17]
 ['2,9,8' 19]
 ['J,J' 20]
 ['K,7' 17]]


  """Entry point for launching an IPython kernel.


In [143]:
print (df2[["p_hand", "p_initial"]].as_matrix()[np.random.randint(len(df2), size=5)])

[['10,9' 19]
 ['2,Q' 12]
 ['2,6' 8]
 ['2,4' 6]
 ['7,8' 15]]


  """Entry point for launching an IPython kernel.


In [144]:
print (df2[["p_final_hand", "p_final"]].as_matrix()[np.random.randint(len(df2), size=5)])

[['6,J' 16]
 ['7,K,9' 26]
 ['10,Q' 20]
 ['3,Q,9' 22]
 ['6,3,J' 19]]


  """Entry point for launching an IPython kernel.


## If p_bust = 1, p_win = 0

In [145]:
df2.p_win[df2["p_bust"] == 1].value_counts()

0    16878
Name: p_win, dtype: int64

## If d_bust = 1, p_win = 1

In [146]:
df2.p_win[df2["d_bust"] == 1].value_counts()

1    22779
Name: p_win, dtype: int64

## If soft_17 = 1, then when d_final == 17, it's not a soft 17

- If dealer is required to hit on soft 17
    - If player didn't get blackjack
    - If player didn't bust
    - Dealer should not have a soft 17 final hand

In [147]:
for i in df2.d_final_hand[(df2.soft_17 == 1) & (df2.d_final == 17)].index:
    if soft_17(df2.ix[i, "d_final_hand"]) and df2.ix[i,"p_bust"] == 0 and df2.ix[i,"p_initial"] != 21:
        print (df2.ix[i])

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  


## If no one busted, the higher point wins

In [148]:
sub = df2[(df2.p_bust == 0) & (df2.d_bust == 0) & (df2.p_win == 1)]

In [149]:
(sub.p_final > sub.d_final).value_counts()

True    19267
dtype: int64

In [150]:
sub = df2[(df2.p_bust == 0) & (df2.d_bust == 0) & (df2.p_win == 0)]

In [151]:
(sub.p_final <= sub.d_final).value_counts()

True    41895
dtype: int64

## If playing the recommended strategy, player should not hit on 17 and above

In [152]:
df2.p_hit[(df2.p_initial >= 17) & (df2.strategy==1)].value_counts()

0    17548
Name: p_hit, dtype: int64

## If playing the recommended strategy, player should not hit on 12-16 if the dealer is showing 6 and below

In [153]:
df2.p_hit[(df2.p_initial<17) & (df2.p_initial>11) & (df2.d_open<=6) & (df2.strategy==1)].value_counts()

0    10045
Name: p_hit, dtype: int64

# Data dictionary

In [154]:
df2.columns

Index(['d_hand', 'd_final_hand', 'p_hand', 'p_final_hand', 'strategy', 'split',
       'num_decks', 'soft_17', 'game_type', 'd_open', 'd_initial', 'd_final',
       'd_hit', 'd_bust', 'p_initial', 'p_final', 'p_hit', 'p_bust', 'p_win'],
      dtype='object')

In [156]:
df2.head()

Unnamed: 0,d_hand,d_final_hand,p_hand,p_final_hand,strategy,split,num_decks,soft_17,game_type,d_open,d_initial,d_final,d_hit,d_bust,p_initial,p_final,p_hit,p_bust,p_win
0,29,298,"4,Q","4,Q",1,0,4,0,0,2,11,19,1,0,14,14,0,0,0
1,"K,3","K,3,3,Q",76,766,0,0,4,0,0,10,13,26,1,1,13,19,1,0,1
2,910,910,45,"4,5,Q",0,0,4,0,0,9,19,19,0,0,9,19,1,0,0
3,"Q,9","Q,9","K,A","K,A",0,0,4,0,0,10,19,19,0,0,21,21,0,0,1
4,"4,Q","4,Q,2,Q",79,794,0,0,4,0,0,4,14,26,1,1,16,20,1,0,1


In [157]:
data_dict = pd.read_csv("data_dictionary.csv")

In [158]:
data_dict

Unnamed: 0,feature,description
0,d_hand,Dealer's starting hand
1,d_final_hand,Dealer's ending hand
2,p_hand,Player's starting hand
3,p_final_hand,Player's ending hand
4,strategy,0=random strategy 1=recommended strategy
5,split,0=no split 1=split
6,num_decks,Number of decks
7,soft_17,0=dealer to stand on all 17 1=dealer to hit on soft 17
8,game_type,0=continuous shuffling 1=no reshuffling until end of deck
9,d_open,Dealer's open card
