In [1]:
from itertools import combinations
import numpy as np
import pandas as pd
import pickle as pkl
import time


pd.options.display.max_rows = 100

In [40]:
summary_df = pd.read_csv('leave_summary_20200508.csv').rename(columns={'Unnamed: 0':'leave'}).set_index('leave')

In [41]:
count_dict = summary_df['count'].to_dict()
equity_dict = summary_df['equity'].to_dict()
mean_equity_dict = summary_df['mean_equity'].to_dict()
summary_df = summary_df.reset_index()

In [42]:
summary_df['leave_len'] = summary_df['leave'].apply(lambda x: len(x))

In [5]:
leaves = pkl.load(open('all_leaves.p','rb'))
alphabetical_key = '?ABCDEFGHIJKLMNOPQRSTUVWXYZ'
sort_func = lambda x: alphabetical_key.index(x)

In [6]:
child_leaves = {leave:[''.join(sorted(leave+letter, key=sort_func)) for letter in alphabetical_key]
                for i in range(1,6) for leave in leaves[i]}
child_leaves[''] = [x for x in alphabetical_key]

In [7]:
list(combinations('AABBJ',4))

[('A', 'A', 'B', 'B'),
 ('A', 'A', 'B', 'J'),
 ('A', 'A', 'B', 'J'),
 ('A', 'B', 'B', 'J'),
 ('A', 'B', 'B', 'J')]

The following doesn't filter out impossible racks - we do that in the calculate_smoothed_superleave function instead to make this code faster.

In [8]:
def get_neighboring_leaves(original_leave):
    t0 = time.time()
    subleaves = [''.join(x) for x in combinations(original_leave, len(original_leave)-1)]
    t1 = time.time()
    
    neighbors = []
    for leave in subleaves:
        neighbors += child_leaves[leave]
        
    t2 = time.time()
    
    # filter neighbors to make sure they have the same number of blanks
    blank_count = sum([x=='?' for x in original_leave])
    
    t3 = time.time()
    
    neighbors = [leave for leave in neighbors if(sum([x=='?' for x in leave])==blank_count)]
    
    t4 = time.time()
    
    return neighbors

In [9]:
def calculate_smoothed_superleave(superleave):
    neighbors = get_neighboring_leaves(superleave)
    
    neighboring_equity = 0
    neighboring_count = 0
    equity_list = []
        
    for neighbor_leave in neighbors:
        neighboring_equity += equity_dict.get(neighbor_leave, 0)
        neighboring_count += count_dict.get(neighbor_leave, 0)
        equity_list.append(mean_equity_dict.get(neighbor_leave))
                
    equity_list = [x for x in equity_list if pd.notnull(x)]
    
#     print('Original:')
#     print(summary_df.loc[summary_df['leave']==superleave])
#     print(neighboring_equity, neighboring_count, neighboring_equity/neighboring_count)
#     print(np.mean(equity_list))
#     print(equity_list)
    
    return neighboring_equity/neighboring_count


3820 superleaves of length 6 were never observed once

In [10]:
print(summary_df.loc[summary_df['leave_len']==6].apply(lambda x: pd.notnull(x['ev']), axis=1).value_counts())
print(summary_df.loc[summary_df['leave_len']==6].apply(lambda x: pd.notnull(x['ev']) and x['count']<10, axis=1).value_counts())

True     733491
False      3820
dtype: int64
False    676958
True      60353
dtype: int64


In [11]:
summary_df.loc[summary_df['leave_len']==5].sort_values('ev', ascending=False)[:50]

Unnamed: 0,leave,points,equity,count,bingo_count,mean_score,mean_equity,bingo pct,pct,adjusted_mean_score,ev,leave_len
167808,??LQX,2917,15182.514,103,2,28.320388,147.403049,1.941748,0.011261,-10.751291,106.315359,5
109471,??DQX,3218,15198.186,110,0,29.254545,138.165327,0.0,0.012027,-9.817134,97.077638,5
108048,??DMZ,12534,44523.936,382,61,32.811518,116.554806,15.968586,0.041766,-6.260161,75.467117,5
169820,??MOZ,28258,64857.103,585,184,48.304274,110.866843,31.452991,0.063961,9.232594,69.779154,5
78274,??CCV,3659,17857.948,167,21,21.91018,106.93382,12.57485,0.018259,-17.1615,65.846131,5
87025,??CHK,9801,29445.381,277,70,35.382671,106.301014,25.270758,0.030286,-3.689008,65.213325,5
123214,??ESZ,94252,101743.515,985,765,95.68731,103.292909,77.664975,0.107695,56.61563,62.205219,5
108972,??DOZ,48383,84760.445,827,318,58.504232,102.491469,38.452237,0.09042,19.432553,61.40378,5
118409,??EIZ,250647,270260.787,2662,2148,94.1574,101.525465,80.69121,0.291049,55.085721,60.437776,5
107424,??DLX,18761,35242.078,353,116,53.147309,99.835915,32.86119,0.038595,14.075629,58.748226,5


In [43]:
summary_df['smoothed_ev'] = summary_df['ev']
summary_df['point_equity_diff'] = (summary_df['points']-summary_df['equity'])/summary_df['count']

In [13]:
summary_df.sort_values('point_equity_diff')[:50]

Unnamed: 0,leave,points,equity,count,bingo_count,mean_score,mean_equity,bingo pct,pct,adjusted_mean_score,ev,leave_len,smoothed_ev,point_equity_diff
618171,??DLQX,303,12408.544,46,0,6.586957,269.750957,0.0,0.005029,-32.484723,228.663267,6,228.663267,-263.164
559834,??DDQX,26,815.492,3,0,8.666667,271.830667,0.0,0.000328,-30.405013,230.742978,6,230.742978,-263.164
879005,??LLQX,107,1215.027,7,0,15.285714,173.575286,0.0,0.000765,-23.785965,132.487597,6,132.487597,-158.289571
886010,??LPQX,48,976.316,6,0,8.0,162.719333,0.0,0.000656,-31.071679,121.631644,6,121.631644,-154.719333
887144,??LQXY,74,351.173,2,0,37.0,175.5865,0.0,0.000219,-2.071679,134.498811,6,134.498811,-138.5865
886915,??LQSX,56,324.656,2,0,28.0,162.328,0.0,0.000219,-11.071679,121.240311,6,121.240311,-134.328
620183,??DMOZ,2583,32787.245,233,3,11.085837,140.71779,1.287554,0.025475,-27.985842,99.630101,6,99.630101,-129.631953
457258,??CCJV,10,138.164,1,0,10.0,138.164,0.0,0.000109,-29.071679,97.076311,6,97.076311,-128.164
459910,??CCNV,987,14439.933,107,0,9.224299,134.952645,0.0,0.011699,-29.84738,93.864956,6,93.864956,-125.728346
616124,??DLLX,281,1382.806,9,1,31.222222,153.645111,11.111111,0.000984,-7.849457,112.557422,6,112.557422,-122.422889


In [14]:
leaves_to_smooth = list(summary_df.loc[(summary_df['leave_len']==5) & (summary_df['count']<828)]['leave'].values)
print(len(leaves_to_smooth))
leaves_to_smooth += list(summary_df.loc[(summary_df['leave_len']==6) & (summary_df['count']<234)]['leave'].values)
print(len(leaves_to_smooth))

37022
472533


"ev" is defined as the average equity of a superleave, minus the average equity over all plays in a run of simulated games. It's about 41 points.

In [15]:
mean_equity = summary_df.loc[summary_df['leave']=='??']['mean_equity'].values[0] - \
    summary_df.loc[summary_df['leave']=='??']['ev'].values[0]

In [44]:
summary_df = summary_df.set_index('leave')
smooth_ev_dict = summary_df['ev'].to_dict()

In [17]:
ev_dict = summary_df['ev'].to_dict()

In [18]:
smooth_ev_dict['AABBJ']

-5.292460051313853

In [26]:
t0 = time.time()

for i,leave in enumerate(leaves_to_smooth):
    if (i+1)%1000==0:
        print(i, time.time()-t0)
    
    smooth_ev_dict[leave] = calculate_smoothed_superleave(leave) - mean_equity

999 0.24371099472045898
1999 0.4743340015411377
2999 0.7030730247497559
3999 0.9337389469146729
4999 1.1726887226104736
5999 1.406951904296875
6999 1.641942024230957
7999 1.878746747970581
8999 2.1052029132843018
9999 2.326411008834839
10999 2.5659329891204834
11999 2.823171854019165
12999 3.063843011856079
13999 3.3091189861297607
14999 3.549504041671753
15999 3.7925429344177246
16999 4.060200929641724
17999 4.315937042236328
18999 4.573595762252808
19999 4.811480760574341
20999 5.068622827529907
21999 5.3416218757629395
22999 5.602749824523926
23999 5.869962930679321
24999 6.131170034408569
25999 6.394488096237183
26999 6.647558927536011
27999 6.913771867752075
28999 7.185763835906982
29999 7.466888904571533
30999 7.744753837585449
31999 8.032512903213501
32999 8.311309814453125
33999 8.57605504989624
34999 8.826551914215088
35999 9.069002866744995
36999 9.304077863693237
37999 9.595224857330322
38999 9.929030895233154
39999 10.277236938476562
40999 10.589678764343262
41999 10.920156

In [27]:
pd.Series(smooth_ev_dict).to_csv('leave_values_20200512_smoothed.csv')

  """Entry point for launching an IPython kernel.


In [37]:
smoothed_ev = pd.Series(smooth_ev_dict,name='smoothed_ev')

In [47]:
summary_df = summary_df.drop('smoothed_ev', axis=1)
summary_df = pd.concat([summary_df,smoothed_ev],axis=1)

In [48]:
summary_df

Unnamed: 0_level_0,points,equity,count,bingo_count,mean_score,mean_equity,bingo pct,pct,adjusted_mean_score,ev,leave_len,point_equity_diff,smoothed_ev
leave,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
?,782820960,9.019760e+08,13841229,7322671,56.557186,65.165892,52.904775,1513.324492,17.485506,24.078203,1,-8.608707,24.078203
A,2005089326,2.117354e+09,49868572,10037371,40.207474,42.458690,20.127649,5452.357690,1.135795,1.371000,1,-2.251215,1.371000
B,461839888,4.765642e+08,12390310,1601088,37.274280,38.462657,12.922098,1354.688921,-1.797399,-2.625032,1,-1.188377,-2.625032
C,576503825,6.071711e+08,14491423,2644552,39.782416,41.898656,18.249084,1584.413158,0.710737,0.810967,1,-2.116239,0.810967
D,1012212785,1.059361e+09,25452930,4829723,39.768026,41.620414,18.975116,2782.884551,0.696347,0.532724,1,-1.852387,0.532724
...,...,...,...,...,...,...,...,...,...,...,...,...,...
??WXYY,0,0.000000e+00,0,0,,,,0.000000,,,6,,27.054749
?WXYYZ,59,6.361000e+01,1,0,59.000000,63.610000,0.000000,0.000109,19.928321,22.522311,6,-4.610000,13.542543
??WXYZ,54,5.319400e+01,1,0,54.000000,53.194000,0.000000,0.000109,14.928321,12.106311,6,0.806000,29.007211
??WYYZ,0,0.000000e+00,0,0,,,,0.000000,,,6,,32.317652


In [49]:
summary_df['ev_delta'] = summary_df['smoothed_ev']-summary_df['ev']

In [52]:
summary_df.sort_values('ev_delta', ascending=False)

Unnamed: 0_level_0,points,equity,count,bingo_count,mean_score,mean_equity,bingo pct,pct,adjusted_mean_score,ev,leave_len,point_equity_diff,smoothed_ev,ev_delta
leave,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
??CCVV,0,40.306,1,0,0.0,40.306,0.0,0.000109,-39.071679,-0.781689,6,-40.306,56.351787,57.133476
??CCVX,44,38.329,1,0,44.0,38.329,0.0,0.000109,4.928321,-2.758689,6,5.671,43.757802,46.516492
??FSWW,36,31.358,1,0,36.0,31.358,0.0,0.000109,-3.071679,-9.729689,6,4.642,35.682576,45.412265
??DJQX,48,49.051,1,0,48.0,49.051,0.0,0.000109,8.928321,7.963311,6,-1.051,50.432336,42.469025
??DQXZ,46,55.091,1,0,46.0,55.091,0.0,0.000109,6.928321,14.003311,6,-9.091,56.397570,42.394259
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
?WWXYZ,0,0.000,0,0,,,,0.000000,,,6,,10.266142,
??WWYY,0,0.000,0,0,,,,0.000000,,,6,,27.634908,
??WXYY,0,0.000,0,0,,,,0.000000,,,6,,27.054749,
??WYYZ,0,0.000,0,0,,,,0.000000,,,6,,32.317652,
