In [None]:
# In this notebook I take the game data and group it by opening, and create a scoring system to rank openings based on 
# certain opening attributes. 

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 1000)

In [3]:
chess = pd.read_csv('../data/caissabase_df.csv', index_col = 0)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  mask |= (ar1 == a)


In [4]:
# Calculating white elo performance based on result and black elo.

chess.loc[chess['result'] == '1-0', 'white_perf'] = chess['black_elo'] + 400 
chess.loc[chess['result'] == '1/2-1/2', 'white_perf'] = chess['black_elo']
chess.loc[chess['result'] == '0-1', 'white_perf'] = chess['black_elo'] - 400

In [5]:
# Calculating black elo performance based on result and white elo.

chess.loc[chess['result'] == '1-0', 'black_perf'] = chess['white_elo'] - 400 
chess.loc[chess['result'] == '1/2-1/2', 'black_perf'] = chess['white_elo']
chess.loc[chess['result'] == '0-1', 'black_perf'] = chess['white_elo'] + 400

In [6]:
# Calculating elo difference between a player's elo and their performance.

chess['white_perf_diff'] = chess['white_perf']-chess['white_elo']
chess['black_perf_diff'] = chess['black_perf']-chess['black_elo']

In [8]:
# White has a slight edge in the opening- this becomes apparent when we find the average across all games of white's elo 
# performance compared to white's elo. White performs 32 elo stronger than its rating would indicate, and black performs 
# 32 elo weaker than its rating indicates.

white_first_move_adv = chess['white_perf_diff'].mean()

In [9]:
# I subract the white opening advantage from white and add it to black- now when you find the average white performance, 
# it's 0 elo instead of +32 elo, and the average black performance is 0 elo instead of -32 elo. This is used so that when 
# comparing openings, first move advantage doesn't confuse things. 

chess['white_perf_diff_adjusted'] = chess['white_perf_diff']-white_first_move_adv
chess['black_perf_diff_adjusted'] = chess['black_perf_diff']+white_first_move_adv

In [10]:
chess = chess.astype({'white_perf':'int', 'black_perf':'int', 'white_perf_diff':'int', 'black_perf_diff':'int', 'white_perf_diff_adjusted':'int', 'black_perf_diff_adjusted':'int'})

In [11]:
# I group games by white's first move. I find the mean for each opening's adjusted elo performance and the count of games in
# a given opening. I ignore any openings with less than 25 games to limit the number of openings and cut down on issues of
# small sample size. I calculate the 95% confidence interval for the adjusted elo performance to get an idea of how accurate
# elo performance measures are. I also calculate the percent of the time an opening is played from the previous position- on
# move 1 it's pretty easy, but I have to use a groupby for everything after this. 
#
# In an attempt to create a scoring system, I create a 'countscore' which is 50x^-0.5 where x is the count of the opening.
# 'eloscore' is 0.08x where x is the mean adjusted elo performance for the opening (Note: the formula becomes -0.08x when it
# is black to move). 'rarityscore' is (x^-1-2)/2 where x is the percent of the time an opening is played from the previous
# position, with the percentage as a decimal.
# 
# Some of the code here is repetitive in that I drop counts below 25 and then later am still checking for counts below 25. 
# With additional time this code can be streamlined.
#
# There are some additional calculations- any score above +10 is rounded down to +10. Also, any score below 0 is rounded 
# down to 0, with the exception of the 'eloscore', which goes to -10. Any eloscore below -10 rounds to a score of -10. Total
# score is calculated by summing these thee scores. 
#
# Openings are sorted by highest scores. 

w1 = chess.groupby(['1w']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
w1 = w1.loc[w1['count'] >= 25]
w1['95%_confidence_interval_+'] = w1['mean'] + 1.96*(w1['standard_deviation']/(np.sqrt(w1['count'])))
w1['95%_confidence_interval_-'] = w1['mean'] - 1.96*(w1['standard_deviation']/(np.sqrt(w1['count'])))
w1['+/-range'] = (w1['95%_confidence_interval_+']-w1['95%_confidence_interval_-'])/2
w1 = w1.reset_index()
w1_count = w1['count'].sum()
w1['%_of_previous_position'] = w1['count']/w1_count
w1.loc[w1['count'] >= 25, 'countscore'] = 50*(w1['count'])**-0.5
w1.loc[w1['count'] >= 25, 'eloscore'] = (0.08*w1['mean'])
w1.loc[w1['eloscore'] > 10, 'eloscore'] = 10
w1.loc[w1['count'] >= 25, 'rarityscore'] = ((w1['%_of_previous_position']**-1)-2)/2
w1.loc[w1['rarityscore'] < 0, 'rarityscore'] = 0
w1.loc[w1['rarityscore'] > 10, 'rarityscore'] = 10
w1['score'] = w1['countscore']+w1['eloscore']+w1['rarityscore']
w1.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,%_of_previous_position,countscore,eloscore,rarityscore,score
0,h3,-7.839286,56,339.659229,81.122907,-96.801478,88.962192,3e-05,6.681531,-0.627143,10.0,16.054388
1,h4,-78.051282,39,317.639563,21.64027,-177.742834,99.691552,2.1e-05,8.006408,-6.244103,10.0,11.762305
2,Nh3,-86.897436,39,251.558384,-7.94554,-165.849332,78.951896,2.1e-05,8.006408,-6.951795,10.0,11.054613
3,c3,-32.503247,308,296.761604,0.639494,-65.645988,33.142741,0.000166,2.849014,-2.60026,10.0,10.248755
4,a3,-27.474085,656,306.116741,-4.048458,-50.899713,23.425627,0.000355,1.952172,-2.197927,10.0,9.754245
5,d3,-30.037152,646,297.939157,-7.061523,-53.01278,22.975628,0.000349,1.967224,-2.402972,10.0,9.564252
6,Nc3,-19.921842,2866,292.389007,-9.217031,-30.626654,10.704811,0.001549,0.933968,-1.593747,10.0,9.34022
7,g3,-18.765801,13433,280.334615,-14.025059,-23.506542,4.740742,0.007261,0.431403,-1.501264,10.0,8.930139
8,b3,-22.657389,8946,288.830136,-16.672114,-28.642664,5.985275,0.004836,0.528635,-1.812591,10.0,8.716043
9,b4,-34.859571,1353,305.866689,-18.561382,-51.157761,16.298189,0.000731,1.359318,-2.788766,10.0,8.570552


In [79]:
# This cell and the next 18 cells beneath it are very similar. I could have probably made a loop but I didn't want to waste
# massive amounts of time for minimal benefit. 
#
# Almost everything here is the same as the cell above, here we go a move deeper to black's first move. Getting the 
# percent of the time an opening was played from the previous move, we have to group by all the previous moves (in this
# case, just one move), find the count for the previous move, and merge it on all previous moves.
#
# In each cell, I go deeper and deeper into different openings with this scoring system. I stop at move 10 (10 moves for
# white, 10 moves for black) because I deem it a reasonable stopping point.

b1 = chess.groupby(['1w','1b']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
b1 = b1.loc[b1['count'] >= 25]
b1['95%_confidence_interval_+'] = b1['mean'] + 1.96*(b1['standard_deviation']/(np.sqrt(b1['count'])))
b1['95%_confidence_interval_-'] = b1['mean'] - 1.96*(b1['standard_deviation']/(np.sqrt(b1['count'])))
b1['+/-range'] = (b1['95%_confidence_interval_+']-b1['95%_confidence_interval_-'])/2
b1 = b1.reset_index()
b1p = b1.groupby(['1w']).agg(previous_count = ('count','sum'))
b1 = b1.merge(b1p, how = 'outer', on = '1w')
b1['%_of_previous_position'] = b1['count'] / b1['previous_count']
b1.loc[b1['count'] >= 25, 'countscore'] = 50*(b1['count'])**-0.5
b1.loc[b1['count'] >= 25, 'eloscore'] = (-0.08*b1['mean'])
b1.loc[b1['eloscore'] > 10, 'eloscore'] = 10
b1.loc[b1['count'] >= 25, 'rarityscore'] = ((b1['%_of_previous_position']**-1)-2)/2
b1.loc[b1['rarityscore'] < 0, 'rarityscore'] = 0
b1.loc[b1['rarityscore'] > 10, 'rarityscore'] = 10
b1['score'] = b1['countscore']+b1['eloscore']+b1['rarityscore']
b1.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,f4,f5,-102.142857,49,277.489264,-24.445863,-179.839851,77.696994,5901,0.008304,7.142857,8.171429,10.0,25.314286
1,f4,Nh6,-94.386364,44,234.097298,-25.215032,-163.557695,69.171331,5901,0.007456,7.537784,7.550909,10.0,25.088693
2,b4,a5,-78.939394,33,321.9998,30.924428,-188.803216,109.863822,1298,0.025424,8.703883,6.315152,10.0,25.019034
3,Nc3,c6,-27.961538,26,274.019632,77.368217,-133.291294,105.329755,2821,0.009217,9.805807,2.236923,10.0,22.04273
4,g3,Nc6,-27.766667,30,315.328584,85.072218,-140.605551,112.838885,13382,0.002242,9.128709,2.221333,10.0,21.350043
5,c4,g5,-12.5,30,296.396839,93.564247,-118.564247,106.064247,122863,0.000244,9.128709,1.0,10.0,20.128709
6,b3,e6,-42.557143,70,305.774978,29.075174,-114.189459,71.632316,8897,0.007868,5.976143,3.404571,10.0,19.380714
7,f4,b6,-44.857143,77,265.309041,14.403009,-104.117295,59.260152,5901,0.013049,5.698029,3.588571,10.0,19.2866
8,b3,d6,-68.983516,182,283.670622,-27.770468,-110.196565,41.213048,8897,0.020456,3.706247,5.518681,10.0,19.224928
9,a3,c5,-3.448276,29,281.855914,99.136809,-106.033361,102.585085,614,0.047231,9.284767,0.275862,9.586207,19.146836


In [13]:
w2 = chess.groupby(['1w','1b','2w']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
w2 = w2.loc[w2['count'] >= 25]
w2['95%_confidence_interval_+'] = w2['mean'] + 1.96*(w2['standard_deviation']/(np.sqrt(w2['count'])))
w2['95%_confidence_interval_-'] = w2['mean'] - 1.96*(w2['standard_deviation']/(np.sqrt(w2['count'])))
w2['+/-range'] = (w2['95%_confidence_interval_+']-w2['95%_confidence_interval_-'])/2
w2 = w2.reset_index()
w2p = w2.groupby(['1w','1b']).agg(previous_count = ('count','sum'))
w2 = w2.merge(w2p, how = 'outer', on = ['1w','1b'])
w2['%_of_previous_position'] = w2['count'] / w2['previous_count']
w2.loc[w2['count'] >= 25, 'countscore'] = 50*(w2['count'])**-0.5
w2.loc[w2['count'] >= 25, 'eloscore'] = (0.08*w2['mean'])
w2.loc[w2['eloscore'] > 10, 'eloscore'] = 10
w2.loc[w2['count'] >= 25, 'rarityscore'] = ((w2['%_of_previous_position']**-1)-2)/2
w2.loc[w2['rarityscore'] < 0, 'rarityscore'] = 0
w2.loc[w2['rarityscore'] > 10, 'rarityscore'] = 10
w2['score'] = w2['countscore']+w2['eloscore']+w2['rarityscore']
w2.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,e4,b6,c4,203.666667,27,245.147365,296.136786,111.196547,92.47012,1841,0.014666,9.622504,10.0,10.0,29.622504
1,Nf3,f5,b4,113.184211,38,267.178537,198.134691,28.23373,84.950481,4836,0.007858,8.111071,9.054737,10.0,27.165808
2,g3,d5,d3,61.6,35,219.786019,134.415211,-11.215211,72.815211,4839,0.007233,8.451543,4.928,10.0,23.379543
3,e4,Nc6,Bb5,37.5,32,283.332929,135.669851,-60.669851,98.169851,4653,0.006877,8.838835,3.0,10.0,21.838835
4,g3,g6,c4,38.318182,44,262.978801,116.023452,-39.387088,77.70527,1845,0.023848,7.537784,3.065455,10.0,20.603238
5,c4,c5,e3,10.3125,32,273.903378,105.215179,-84.590179,94.902679,14787,0.002164,8.838835,0.825,10.0,19.663835
6,Nf3,e6,e4,43.941176,68,269.036861,107.887179,-20.004826,63.946003,3162,0.021505,6.063391,3.515294,10.0,19.578685
7,g3,Nf6,Nf3,18.289474,38,308.711088,116.445394,-79.866447,98.155921,2656,0.014307,8.111071,1.463158,10.0,19.574229
8,e4,d6,d3,28.2,50,290.128191,108.619432,-52.219432,80.419432,36233,0.00138,7.071068,2.256,10.0,19.327068
9,e4,b6,Nf3,2.516129,31,295.500014,106.539933,-101.507675,104.023804,1841,0.016839,8.980265,0.20129,10.0,19.181555


In [14]:
b2 = chess.groupby(['1w','1b','2w','2b']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
b2 = b2.loc[b2['count'] >= 25]
b2['95%_confidence_interval_+'] = b2['mean'] + 1.96*(b2['standard_deviation']/(np.sqrt(b2['count'])))
b2['95%_confidence_interval_-'] = b2['mean'] - 1.96*(b2['standard_deviation']/(np.sqrt(b2['count'])))
b2['+/-range'] = (b2['95%_confidence_interval_+']-b2['95%_confidence_interval_-'])/2
b2 = b2.reset_index()
b2p = b2.groupby(['1w','1b','2w']).agg(previous_count = ('count','sum'))
b2 = b2.merge(b2p, how = 'outer', on = ['1w','1b','2w'])
b2['%_of_previous_position'] = b2['count'] / b2['previous_count']
b2.loc[b2['count'] >= 25, 'countscore'] = 50*(b2['count'])**-0.5
b2.loc[b2['count'] >= 25, 'eloscore'] = (-0.08*b2['mean'])
b2.loc[b2['eloscore'] > 10, 'eloscore'] = 10
b2.loc[b2['count'] >= 25, 'rarityscore'] = ((b2['%_of_previous_position']**-1)-2)/2
b2.loc[b2['rarityscore'] < 0, 'rarityscore'] = 0
b2.loc[b2['rarityscore'] > 10, 'rarityscore'] = 10
b2['score'] = b2['countscore']+b2['eloscore']+b2['rarityscore']
b2.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,g3,c5,Bg2,d5,-117.485714,35,242.834532,-37.034520,-197.936908,80.451194,1147,0.030514,8.451543,9.398857,10.000000,27.850400
1,e4,c5,g3,d6,-119.025000,40,325.955439,-18.010363,-220.039637,101.014637,953,0.041973,7.905694,9.522000,10.000000,27.427694
2,c4,g6,Nf3,Nf6,-114.172414,29,288.324726,-9.232921,-219.111907,104.939493,554,0.052347,9.284767,9.133793,8.551724,26.970284
3,g3,c5,Bg2,Nf6,-130.362069,58,311.883171,-50.095626,-210.628512,80.266443,1147,0.050567,6.565322,10.000000,8.887931,25.453253
4,e4,e6,d3,b5,-95.818182,55,329.432120,-8.753770,-182.882593,87.064411,7426,0.007406,6.741999,7.665455,10.000000,24.407453
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999,e4,Nc6,Nf3,d6,33.301565,1406,286.402308,48.272189,18.330940,14.970625,2725,0.515963,1.333452,-2.664125,0.000000,-1.330673
1000,e4,b6,d4,Bb7,41.968712,1630,289.329166,56.014771,27.922653,14.046059,1712,0.952103,1.238444,-3.357497,0.000000,-2.119053
1001,d4,b5,e4,Bb7,63.875817,306,299.979777,97.487274,30.264360,33.611457,403,0.759305,2.858310,-5.110065,0.000000,-2.251756
1002,d4,d6,Nc3,g6,113.515625,64,315.720038,190.867034,36.164216,77.351409,125,0.512000,6.250000,-9.081250,0.000000,-2.831250


In [15]:
w3 = chess.groupby(['1w','1b','2w','2b','3w']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
w3 = w3.loc[w3['count'] >= 25]
w3['95%_confidence_interval_+'] = w3['mean'] + 1.96*(w3['standard_deviation']/(np.sqrt(w3['count'])))
w3['95%_confidence_interval_-'] = w3['mean'] - 1.96*(w3['standard_deviation']/(np.sqrt(w3['count'])))
w3['+/-range'] = (w3['95%_confidence_interval_+']-w3['95%_confidence_interval_-'])/2
w3 = w3.reset_index()
w3p = w3.groupby(['1w','1b','2w','2b']).agg(previous_count = ('count','sum'))
w3 = w3.merge(w3p, how = 'outer', on = ['1w','1b','2w','2b'])
w3['%_of_previous_position'] = w3['count'] / w3['previous_count']
w3.loc[w3['count'] >= 25, 'countscore'] = 50*(w3['count'])**-0.5
w3.loc[w3['count'] >= 25, 'eloscore'] = (0.08*w3['mean'])
w3.loc[w3['eloscore'] > 10, 'eloscore'] = 10
w3.loc[w3['count'] >= 25, 'rarityscore'] = ((w3['%_of_previous_position']**-1)-2)/2
w3.loc[w3['rarityscore'] < 0, 'rarityscore'] = 0
w3.loc[w3['rarityscore'] > 10, 'rarityscore'] = 10
w3['score'] = w3['countscore']+w3['eloscore']+w3['rarityscore']
w3.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,e4,e6,d3,c5,Nc3,118.925926,27,320.905201,239.972066,-2.120214,121.046140,1381,0.019551,9.622504,9.514074,10.000000,29.136579
1,d4,d5,Bg5,h6,Bf4,94.269231,26,281.643826,202.529632,-13.991170,108.260401,743,0.034993,9.805807,7.541538,10.000000,27.347345
2,e4,Nc6,Nf3,d6,Bb5,90.653846,26,291.967114,202.882391,-21.574699,112.228545,1392,0.018678,9.805807,7.252308,10.000000,27.058114
3,e4,g6,d4,d6,c4,124.888889,54,271.864538,197.401201,52.376577,72.512312,1222,0.044190,6.804138,9.991111,10.000000,26.795249
4,d4,e6,e4,d5,Bd3,113.446809,47,302.235857,199.854645,27.038972,86.407836,2163,0.021729,7.293250,9.075745,10.000000,26.368994
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2001,e4,c5,b3,g6,Bb2,-96.755556,225,277.474641,-60.498869,-133.012242,36.256686,225,1.000000,3.333333,-7.740444,0.000000,-4.407111
2002,g4,d5,Bg2,c6,h3,-161.166667,36,257.490360,-77.053149,-245.280184,84.113518,36,1.000000,8.333333,-12.893333,0.000000,-4.560000
2003,b3,g6,Bb2,Nf6,e4,-185.700000,30,278.204979,-86.145627,-285.254373,99.554373,90,0.333333,9.128709,-14.856000,0.500000,-5.227291
2004,d4,Nf6,e3,c5,c3,-196.785714,28,240.979077,-107.525815,-286.045613,89.259899,28,1.000000,9.449112,-15.742857,0.000000,-6.293745


In [16]:
b3 = chess.groupby(['1w','1b','2w','2b','3w','3b']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
b3 = b3.loc[b3['count'] >= 25]
b3['95%_confidence_interval_+'] = b3['mean'] + 1.96*(b3['standard_deviation']/(np.sqrt(b3['count'])))
b3['95%_confidence_interval_-'] = b3['mean'] - 1.96*(b3['standard_deviation']/(np.sqrt(b3['count'])))
b3['+/-range'] = (b3['95%_confidence_interval_+']-b3['95%_confidence_interval_-'])/2
b3 = b3.reset_index()
b3p = b3.groupby(['1w','1b','2w','2b','3w']).agg(previous_count = ('count','sum'))
b3 = b3.merge(b3p, how = 'outer', on = ['1w','1b','2w','2b','3w'])
b3['%_of_previous_position'] = b3['count'] / b3['previous_count']
b3.loc[b3['count'] >= 25, 'countscore'] = 50*(b3['count'])**-0.5
b3.loc[b3['count'] >= 25, 'eloscore'] = (-0.08*b3['mean'])
b3.loc[b3['eloscore'] > 10, 'eloscore'] = 10
b3.loc[b3['count'] >= 25, 'rarityscore'] = ((b3['%_of_previous_position']**-1)-2)/2
b3.loc[b3['rarityscore'] < 0, 'rarityscore'] = 0
b3.loc[b3['rarityscore'] > 10, 'rarityscore'] = 10
b3['score'] = b3['countscore']+b3['eloscore']+b3['rarityscore']
b3.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,d4,Nf6,Bg5,e6,e4,d6,-111.884615,26,349.747032,22.553815,-246.323046,134.438431,2602,0.009992,9.805807,8.950769,10.000000,28.756576
1,d4,Nf6,Nf3,e6,e3,g6,-124.157895,38,238.487750,-48.329764,-199.986025,75.828131,5285,0.007190,8.111071,9.932632,10.000000,28.043703
2,d4,d5,c4,dxc4,e3,b5,-117.676471,34,319.882338,-10.152108,-225.200833,107.524362,3099,0.010971,8.574929,9.414118,10.000000,27.989047
3,c4,Nf6,g3,e5,Bg2,h6,-138.054054,37,310.297165,-38.069476,-238.038632,99.984578,723,0.051176,8.219949,10.000000,8.770270,26.990220
4,c4,e6,Nf3,d5,d4,a6,-91.724138,29,248.988510,-1.101567,-182.346709,90.622571,1109,0.026150,9.284767,7.337931,10.000000,26.622698
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3387,d4,e6,c4,c5,d5,exd5,52.877551,588,297.921331,76.958246,28.796857,24.080695,588,1.000000,2.061965,-4.230204,0.000000,-2.168239
3388,d4,c5,d5,e5,e4,d6,46.621762,1158,281.954330,62.861557,30.381966,16.239796,1158,1.000000,1.469318,-3.729741,0.000000,-2.260423
3389,d4,c5,d5,f5,e4,fxe4,133.421053,38,247.275274,212.043212,54.798894,78.622159,38,1.000000,8.111071,-10.673684,0.000000,-2.562613
3390,e4,b6,d4,Bb7,Bd3,e6,65.251386,541,280.738114,88.908340,41.594433,23.656954,1003,0.539382,2.149668,-5.220111,0.000000,-3.070443


In [17]:
w4 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
w4 = w4.loc[w4['count'] >= 25]
w4['95%_confidence_interval_+'] = w4['mean'] + 1.96*(w4['standard_deviation']/(np.sqrt(w4['count'])))
w4['95%_confidence_interval_-'] = w4['mean'] - 1.96*(w4['standard_deviation']/(np.sqrt(w4['count'])))
w4['+/-range'] = (w4['95%_confidence_interval_+']-w4['95%_confidence_interval_-'])/2
w4 = w4.reset_index()
w4p = w4.groupby(['1w','1b','2w','2b','3w','3b']).agg(previous_count = ('count','sum'))
w4 = w4.merge(w4p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b'])
w4['%_of_previous_position'] = w4['count'] / w4['previous_count']
w4.loc[w4['count'] >= 25, 'countscore'] = 50*(w4['count'])**-0.5
w4.loc[w4['count'] >= 25, 'eloscore'] = (0.08*w4['mean'])
w4.loc[w4['eloscore'] > 10, 'eloscore'] = 10
w4.loc[w4['count'] >= 25, 'rarityscore'] = ((w4['%_of_previous_position']**-1)-2)/2
w4.loc[w4['rarityscore'] < 0, 'rarityscore'] = 0
w4.loc[w4['rarityscore'] > 10, 'rarityscore'] = 10
w4['score'] = w4['countscore']+w4['eloscore']+w4['rarityscore']
w4.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,e4,c5,Nc3,d6,f4,Nc6,Bc4,144.960000,25,329.741121,274.218519,15.701481,129.258519,922,0.027115,10.000000,10.000000,10.000000,30.000000
1,d4,Nf6,c4,d6,Nc3,g6,g3,125.620690,29,192.088970,195.533941,55.707438,69.913251,614,0.047231,9.284767,10.000000,9.586207,28.870974
2,d4,d6,e4,Nf6,Nc3,g6,Be2,106.920000,25,250.193079,204.995687,8.844313,98.075687,1062,0.023540,10.000000,8.553600,10.000000,28.553600
3,d4,e6,c4,b6,a3,Bb7,d5,143.346154,26,262.642562,244.302704,42.389603,100.956551,501,0.051896,9.805807,10.000000,8.634615,28.440422
4,d4,d6,e4,Nf6,Nc3,g6,h3,102.677419,31,257.064893,193.171046,12.183793,90.493627,1062,0.029190,8.980265,8.214194,10.000000,27.194459
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4764,Nf3,d5,c4,d4,d3,Nc6,g3,-145.461538,39,231.570479,-72.782870,-218.140207,72.678668,39,1.000000,8.006408,-11.636923,0.000000,-3.630515
4765,b3,d5,Bb2,c5,e3,Nc6,f4,-181.000000,30,320.589113,-66.278659,-295.721341,114.721341,134,0.223881,9.128709,-14.480000,1.233333,-4.117957
4766,d4,f5,g3,g6,Bg2,Nf6,c4,-169.378378,37,249.370246,-89.025795,-249.730962,80.352583,116,0.318966,8.219949,-13.550270,0.567568,-4.762753
4767,e4,e6,Qe2,Be7,g3,d5,d3,-169.777778,36,301.283645,-71.358454,-268.197102,98.419324,36,1.000000,8.333333,-13.582222,0.000000,-5.248889


In [18]:
b4 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
b4 = b4.loc[b4['count'] >= 25]
b4['95%_confidence_interval_+'] = b4['mean'] + 1.96*(b4['standard_deviation']/(np.sqrt(b4['count'])))
b4['95%_confidence_interval_-'] = b4['mean'] - 1.96*(b4['standard_deviation']/(np.sqrt(b4['count'])))
b4['+/-range'] = (b4['95%_confidence_interval_+']-b4['95%_confidence_interval_-'])/2
b4 = b4.reset_index()
b4p = b4.groupby(['1w','1b','2w','2b','3w','3b','4w']).agg(previous_count = ('count','sum'))
b4 = b4.merge(b4p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w'])
b4['%_of_previous_position'] = b4['count'] / b4['previous_count']
b4.loc[b4['count'] >= 25, 'countscore'] = 50*(b4['count'])**-0.5
b4.loc[b4['count'] >= 25, 'eloscore'] = (-0.08*b4['mean'])
b4.loc[b4['eloscore'] > 10, 'eloscore'] = 10
b4.loc[b4['count'] >= 25, 'rarityscore'] = ((b4['%_of_previous_position']**-1)-2)/2
b4.loc[b4['rarityscore'] < 0, 'rarityscore'] = 0
b4.loc[b4['rarityscore'] > 10, 'rarityscore'] = 10
b4['score'] = b4['countscore']+b4['eloscore']+b4['rarityscore']
b4.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,e4,e6,d4,d5,exd5,exd5,c4,Nc6,-111.160000,25,294.837198,4.416182,-226.736182,115.576182,796,0.031407,10.000000,8.892800,10.000000,28.892800
1,e4,e5,Bc4,Nf6,d3,Nc6,Nc3,Be7,-100.629630,27,298.371239,11.916651,-213.175910,112.546280,613,0.044046,9.622504,8.050370,10.000000,27.672875
2,e4,c5,Nf3,e6,g3,Nc6,Bg2,e5,-94.080000,25,298.505851,22.934294,-211.094294,117.014294,1781,0.014037,10.000000,7.526400,10.000000,27.526400
3,d4,Nf6,Nf3,d5,Bf4,c5,e3,e6,-86.464286,28,238.851338,2.007488,-174.936059,88.471774,663,0.042232,9.449112,6.917143,10.000000,26.366255
4,Nf3,Nf6,c4,e6,g3,d5,Bg2,Bd6,-75.851852,27,356.524538,58.629980,-210.333683,134.481832,3334,0.008098,9.622504,6.068148,10.000000,25.690653
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6089,e4,c5,Nc3,a6,Nge2,e6,g3,b5,161.037037,27,280.541438,266.857878,55.216196,105.820841,27,1.000000,9.622504,-12.882963,0.000000,-3.260458
6090,d4,Nf6,c4,e5,dxe5,Ne4,a3,b6,168.517241,29,217.997317,247.860169,89.174314,79.342927,102,0.284314,9.284767,-13.481379,0.758621,-3.437992
6091,c4,e6,d4,d5,Nf3,Nf6,g3,Be7,170.200000,25,233.425934,261.702966,78.697034,91.502966,58,0.431034,10.000000,-13.616000,0.160000,-3.456000
6092,d4,d5,c4,Nc6,Nc3,dxc4,Nf3,Bg4,160.555556,54,211.559053,216.983055,104.128056,56.427499,338,0.159763,6.804138,-12.844444,2.129630,-3.910677


In [19]:
w5 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
w5 = w5.loc[w5['count'] >= 25]
w5['95%_confidence_interval_+'] = w5['mean'] + 1.96*(w5['standard_deviation']/(np.sqrt(w5['count'])))
w5['95%_confidence_interval_-'] = w5['mean'] - 1.96*(w5['standard_deviation']/(np.sqrt(w5['count'])))
w5['+/-range'] = (w5['95%_confidence_interval_+']-w5['95%_confidence_interval_-'])/2
w5 = w5.reset_index()
w5p = w5.groupby(['1w','1b','2w','2b','3w','3b','4w','4b']).agg(previous_count = ('count','sum'))
w5 = w5.merge(w5p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w','4b'])
w5['%_of_previous_position'] = w5['count'] / w5['previous_count']
w5.loc[w5['count'] >= 25, 'countscore'] = 50*(w5['count'])**-0.5
w5.loc[w5['count'] >= 25, 'eloscore'] = (-0.08*w5['mean'])
w5.loc[w5['eloscore'] > 10, 'eloscore'] = 10
w5.loc[w5['count'] >= 25, 'rarityscore'] = ((w5['%_of_previous_position']**-1)-2)/2
w5.loc[w5['rarityscore'] < 0, 'rarityscore'] = 0
w5.loc[w5['rarityscore'] > 10, 'rarityscore'] = 10
w5['score'] = w5['countscore']+w5['eloscore']+w5['rarityscore']
w5.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,5w,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,d4,Nf6,c4,g6,Nc3,d5,Bg5,Ne4,cxd5,-150.689655,29,214.920102,-72.466720,-228.912591,78.222936,1543,0.018795,9.284767,10.000000,10.00,29.284767
1,e4,c5,Nf3,d6,d4,cxd4,Qxd4,Nf6,c4,-123.520000,25,248.819232,-25.982861,-221.057139,97.537139,494,0.050607,10.000000,9.881600,8.88,28.761600
2,e4,e5,Nf3,Nc6,Bb5,Nf6,O-O,Bc5,Bxc6,-108.600000,25,306.832066,11.678170,-228.878170,120.278170,574,0.043554,10.000000,8.688000,10.00,28.688000
3,d4,Nf6,c4,e6,Nc3,Bb4,Qc2,d6,e4,-112.148148,27,216.779346,-30.378507,-193.917789,81.769641,605,0.044628,9.622504,8.971852,10.00,28.594356
4,e4,g6,d4,Bg7,Nc3,c6,Nf3,d5,exd5,-104.038462,26,235.004422,-13.705670,-194.371253,90.332792,746,0.034853,9.805807,8.323077,10.00,28.128884
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7056,e4,g6,d4,Bg7,Nc3,c5,dxc5,Qa5,Bd2,133.156250,64,284.643761,202.893971,63.418529,69.737721,64,1.000000,6.250000,-10.652500,0.00,-4.402500
7057,d4,f5,Bg5,Nf6,Bxf6,exf6,e3,d5,c4,141.245283,53,229.817980,203.118421,79.372145,61.873138,94,0.563830,6.868028,-11.299623,0.00,-4.431594
7058,e4,e5,Nf3,Nc6,d4,exd4,Bc4,Be7,c3,185.500000,28,230.348126,270.822140,100.177860,85.322140,28,1.000000,9.449112,-14.840000,0.00,-5.390888
7059,d4,Nf6,Bg5,Ne4,Bf4,e6,f3,Bd6,Bxd6,196.066667,30,238.022157,281.241802,110.891532,85.175135,30,1.000000,9.128709,-15.685333,0.00,-6.556624


In [20]:
b5 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
b5 = b5.loc[b5['count'] >= 25]
b5['95%_confidence_interval_+'] = b5['mean'] + 1.96*(b5['standard_deviation']/(np.sqrt(b5['count'])))
b5['95%_confidence_interval_-'] = b5['mean'] - 1.96*(b5['standard_deviation']/(np.sqrt(b5['count'])))
b5['+/-range'] = (b5['95%_confidence_interval_+']-b5['95%_confidence_interval_-'])/2
b5 = b5.reset_index()
b5p = b5.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w']).agg(previous_count = ('count','sum'))
b5 = b5.merge(b5p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w','4b','5w'])
b5['%_of_previous_position'] = b5['count'] / b5['previous_count']
b5.loc[b5['count'] >= 25, 'countscore'] = 50*(b5['count'])**-0.5
b5.loc[b5['count'] >= 25, 'eloscore'] = (-0.08*b5['mean'])
b5.loc[b5['eloscore'] > 10, 'eloscore'] = 10
b5.loc[b5['count'] >= 25, 'rarityscore'] = ((b5['%_of_previous_position']**-1)-2)/2
b5.loc[b5['rarityscore'] < 0, 'rarityscore'] = 0
b5.loc[b5['rarityscore'] > 10, 'rarityscore'] = 10
b5['score'] = b5['countscore']+b5['eloscore']+b5['rarityscore']
b5.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,5w,5b,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,e4,c5,Nf3,Nc6,d4,cxd4,Nxd4,Nf6,Nc3,h5,-120.640000,25,287.323117,-8.009338,-233.270662,112.630662,23413,0.001068,10.000000,9.651200,10.000000,29.651200
1,d4,Nf6,c4,g6,Nc3,Bg7,Nf3,O-O,g3,c6,-111.529412,34,282.208201,-16.668730,-206.390094,94.860682,750,0.045333,8.574929,8.922353,10.000000,27.497282
2,c4,e5,Nc3,Nf6,g3,d5,cxd5,Nxd5,Bg2,Ne7,-105.166667,36,292.453415,-9.631884,-200.701449,95.534782,1165,0.030901,8.333333,8.413333,10.000000,26.746667
3,d4,d5,Nf3,Nf6,c4,c6,Nc3,dxc4,a4,Bg4,-98.125000,32,249.707022,-11.605931,-184.644069,86.519069,1161,0.027562,8.838835,7.850000,10.000000,26.688835
4,d4,Nf6,c4,c5,d5,b5,cxb5,a6,e3,d6,-107.842105,38,292.088891,-14.971278,-200.712932,92.870827,803,0.047323,8.111071,8.627368,9.565789,26.304229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7579,e4,g6,d4,Bg7,Nf3,d6,Bc4,c6,O-O,d5,174.080000,25,237.066960,267.010248,81.149752,92.930248,25,1.000000,10.000000,-13.926400,0.000000,-3.926400
7580,e4,g6,d4,Bg7,Nc3,c5,dxc5,Qa5,Bd2,Qxc5,130.698413,63,286.244686,201.382756,60.014069,70.684344,63,1.000000,6.299408,-10.455873,0.000000,-4.156465
7581,e4,d6,d4,g6,Nc3,Bg7,Be3,a6,f4,b5,141.785714,56,271.929918,213.008515,70.562913,71.222801,56,1.000000,6.681531,-11.342857,0.000000,-4.661326
7582,d4,Nf6,Bg5,Ne4,Bf4,e6,f3,Bd6,Bxd6,Nxd6,196.066667,30,238.022157,281.241802,110.891532,85.175135,30,1.000000,9.128709,-15.685333,0.000000,-6.556624


In [21]:
w6 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
w6 = w6.loc[w6['count'] >= 25]
w6['95%_confidence_interval_+'] = w6['mean'] + 1.96*(w6['standard_deviation']/(np.sqrt(w6['count'])))
w6['95%_confidence_interval_-'] = w6['mean'] - 1.96*(w6['standard_deviation']/(np.sqrt(w6['count'])))
w6['+/-range'] = (w6['95%_confidence_interval_+']-w6['95%_confidence_interval_-'])/2
w6 = w6.reset_index()
w6p = w6.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b']).agg(previous_count = ('count','sum'))
w6 = w6.merge(w6p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b'])
w6['%_of_previous_position'] = w6['count'] / w6['previous_count']
w6.loc[w6['count'] >= 25, 'countscore'] = 50*(w6['count'])**-0.5
w6.loc[w6['count'] >= 25, 'eloscore'] = (-0.08*w6['mean'])
w6.loc[w6['eloscore'] > 10, 'eloscore'] = 10
w6.loc[w6['count'] >= 25, 'rarityscore'] = ((w6['%_of_previous_position']**-1)-2)/2
w6.loc[w6['rarityscore'] < 0, 'rarityscore'] = 0
w6.loc[w6['rarityscore'] > 10, 'rarityscore'] = 10
w6['score'] = w6['countscore']+w6['eloscore']+w6['rarityscore']
w6.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,5w,5b,6w,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,e4,e6,Nf3,d5,Nc3,Nf6,e5,Nfd7,d4,c5,Bb5,-134.296296,27,310.521375,-17.166958,-251.425635,117.129338,779,0.034660,9.622504,10.000000,10.0,29.622504
1,e4,e6,d4,d5,Nc3,dxe4,Nxe4,Nd7,Nf3,Ngf6,Ng3,-131.857143,28,366.180963,3.778184,-267.492470,135.635327,1896,0.014768,9.449112,10.000000,10.0,29.449112
2,d4,d5,c4,c6,cxd5,cxd5,Nc3,Nf6,Nf3,Nc6,e3,-115.800000,25,260.147875,-13.822033,-217.777967,101.977967,1029,0.024295,10.000000,9.264000,10.0,29.264000
3,e4,c5,Nc3,Nc6,g3,g6,Bg2,Bg7,d3,d6,Nf3,-129.117647,34,295.681213,-29.728184,-228.507110,99.389463,3516,0.009670,8.574929,10.000000,10.0,28.574929
4,e4,c5,Nf3,Nc6,d4,cxd4,Nxd4,Nf6,Nc3,e5,Nf3,-120.228571,35,375.071216,4.032698,-244.489841,124.261269,12803,0.002734,8.451543,9.618286,10.0,28.069828
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7834,e4,c5,c3,d6,d4,cxd4,cxd4,Nf6,Nc3,g6,Nf3,179.640000,25,239.467270,273.511170,85.768830,93.871170,25,1.000000,10.000000,-14.371200,0.0,-4.371200
7835,e4,e5,Nf3,Nc6,Bb5,a6,Ba4,g6,d4,exd4,Nxd4,147.297872,47,311.470350,236.345807,58.249937,89.047935,93,0.505376,7.293250,-11.783830,0.0,-4.490580
7836,e4,g6,d4,Bg7,Nc3,c5,dxc5,Qa5,Bd2,Qxc5,Nd5,153.360000,50,286.753735,232.844080,73.875920,79.484080,50,1.000000,7.071068,-12.268800,0.0,-5.197732
7837,d4,Nf6,Bg5,Ne4,Bf4,e6,f3,Bd6,Bxd6,Nxd6,e4,192.120000,25,253.297144,291.412481,92.827519,99.292481,25,1.000000,10.000000,-15.369600,0.0,-5.369600


In [22]:
b6 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
b6 = b6.loc[b6['count'] >= 25]
b6['95%_confidence_interval_+'] = b6['mean'] + 1.96*(b6['standard_deviation']/(np.sqrt(b6['count'])))
b6['95%_confidence_interval_-'] = b6['mean'] - 1.96*(b6['standard_deviation']/(np.sqrt(b6['count'])))
b6['+/-range'] = (b6['95%_confidence_interval_+']-b6['95%_confidence_interval_-'])/2
b6 = b6.reset_index()
b6p = b6.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w']).agg(previous_count = ('count','sum'))
b6 = b6.merge(b6p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w'])
b6['%_of_previous_position'] = b6['count'] / b6['previous_count']
b6.loc[b6['count'] >= 25, 'countscore'] = 50*(b6['count'])**-0.5
b6.loc[b6['count'] >= 25, 'eloscore'] = (-0.08*b6['mean'])
b6.loc[b6['eloscore'] > 10, 'eloscore'] = 10
b6.loc[b6['count'] >= 25, 'rarityscore'] = ((b6['%_of_previous_position']**-1)-2)/2
b6.loc[b6['rarityscore'] < 0, 'rarityscore'] = 0
b6.loc[b6['rarityscore'] > 10, 'rarityscore'] = 10
b6['score'] = b6['countscore']+b6['eloscore']+b6['rarityscore']
b6.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,5w,5b,6w,6b,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,Nf3,Nf6,c4,e6,g3,d5,Bg2,Be7,O-O,O-O,b3,c6,-167.360000,25,279.255988,-57.891653,-276.828347,109.468347,516,0.048450,10.000000,10.000000,9.320000,29.320000
1,e4,e5,Nf3,Nc6,d4,exd4,Nxd4,Bc5,Be3,Qf6,c3,d6,-96.481481,27,289.223678,12.614319,-205.577282,109.095801,1607,0.016801,9.622504,7.718519,10.000000,27.341023
2,d4,d5,c4,e6,Nc3,Be7,Nf3,Nf6,Bf4,O-O,e3,c6,-93.133333,30,325.259232,23.259188,-209.525855,116.392521,768,0.039062,9.128709,7.450667,10.000000,26.579376
3,e4,c5,Nf3,d6,d4,cxd4,Nxd4,Nf6,Nc3,a6,f3,b5,-88.314286,35,281.435164,4.925316,-181.553887,93.239601,2945,0.011885,8.451543,7.065143,10.000000,25.516685
4,d4,Nf6,c4,g6,Nc3,Bg7,e4,d6,Be2,O-O,Bg5,Nc6,-71.357143,28,336.646118,53.338324,-196.052610,124.695467,2566,0.010912,9.449112,5.708571,10.000000,25.157683
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7852,e4,g6,d4,Bg7,Nc3,c6,h3,d5,Nf3,dxe4,Nxe4,Nd7,170.935484,31,280.889769,269.816096,72.054872,98.880612,31,1.000000,8.980265,-13.674839,0.000000,-4.694574
7853,d4,Nf6,c4,g6,Nc3,Bg7,Nf3,O-O,Bg5,d6,e3,c5,130.141414,99,297.542726,188.753586,71.529243,58.612171,318,0.311321,5.025189,-10.411313,0.606061,-4.780063
7854,e4,e5,Nf3,Nc6,Bb5,a6,Ba4,g6,d4,exd4,Nxd4,Bg7,168.600000,40,306.435236,263.565263,73.634737,94.965263,40,1.000000,7.905694,-13.488000,0.000000,-5.582306
7855,e4,e6,d3,d5,Nd2,Nf6,Ngf3,Be7,g3,O-O,Bg2,c5,173.434783,46,255.773828,247.349898,99.519667,73.915115,46,1.000000,7.372098,-13.874783,0.000000,-6.502685


In [23]:
w7 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
w7 = w7.loc[w7['count'] >= 25]
w7['95%_confidence_interval_+'] = w7['mean'] + 1.96*(w7['standard_deviation']/(np.sqrt(w7['count'])))
w7['95%_confidence_interval_-'] = w7['mean'] - 1.96*(w7['standard_deviation']/(np.sqrt(w7['count'])))
w7['+/-range'] = (w7['95%_confidence_interval_+']-w7['95%_confidence_interval_-'])/2
w7 = w7.reset_index()
w7p = w7.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b']).agg(previous_count = ('count','sum'))
w7 = w7.merge(w7p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b'])
w7['%_of_previous_position'] = w7['count'] / w7['previous_count']
w7.loc[w7['count'] >= 25, 'countscore'] = 50*(w7['count'])**-0.5
w7.loc[w7['count'] >= 25, 'eloscore'] = (-0.08*w7['mean'])
w7.loc[w7['eloscore'] > 10, 'eloscore'] = 10
w7.loc[w7['count'] >= 25, 'rarityscore'] = ((w7['%_of_previous_position']**-1)-2)/2
w7.loc[w7['rarityscore'] < 0, 'rarityscore'] = 0
w7.loc[w7['rarityscore'] > 10, 'rarityscore'] = 10
w7['score'] = w7['countscore']+w7['eloscore']+w7['rarityscore']
w7.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,5w,5b,6w,6b,7w,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,e4,c5,Nf3,e6,d4,cxd4,Nxd4,Nc6,Nc3,Qc7,Be3,Nf6,Ndb5,-154.722222,36,298.141827,-57.329225,-252.115219,97.392997,782,0.046036,8.333333,10.000000,9.861111,28.194444
1,e4,e6,d4,d5,Nc3,Bb4,e5,c5,a3,Bxc3+,bxc3,Qa5,Qd2,-118.826087,46,270.815419,-40.564159,-197.088015,78.261928,1695,0.027139,7.372098,9.506087,10.000000,26.878185
2,e4,e6,d4,d5,Nc3,Nf6,Bg5,Bb4,e5,h6,Be3,Ne4,a3,-105.680000,25,254.729059,-5.826209,-205.533791,99.853791,462,0.054113,10.000000,8.454400,8.240000,26.694400
3,d4,Nf6,c4,e6,Nc3,Bb4,e3,O-O,Bd3,d5,Nf3,c5,a3,-97.718750,32,264.208402,-6.175209,-189.262291,91.543541,1848,0.017316,8.838835,7.817500,10.000000,26.656335
4,e4,e6,d4,d5,Nc3,dxe4,Nxe4,Nd7,Nf3,Ngf6,Nxf6+,Nxf6,Ne5,-86.172414,29,282.529527,16.657842,-189.002669,102.830255,997,0.029087,9.284767,6.893793,10.000000,26.178560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7562,e4,e6,d4,d5,Nc3,Bb4,e5,c5,a3,Ba5,Bd2,cxd4,Nb5,184.240000,25,321.432507,310.241543,58.238457,126.001543,25,1.000000,10.000000,-14.739200,0.000000,-4.739200
7563,e4,e5,Nf3,Nc6,Bb5,a6,Ba4,g6,d4,exd4,Nxd4,Bg7,Nxc6,167.918919,37,310.633526,268.011880,67.825958,100.092961,37,1.000000,8.219949,-13.433514,0.000000,-5.213564
7564,d4,d5,Bf4,c5,e3,Nc6,c3,Qb6,Qb3,c4,Qc2,Bf5,Qxf5,192.393939,33,243.791527,275.573708,109.214171,83.179769,33,1.000000,8.703883,-15.391515,0.000000,-6.687632
7565,e4,e6,d3,d5,Nd2,Nf6,Ngf3,Be7,g3,O-O,Bg2,c5,O-O,184.272727,44,256.304358,260.005825,108.539629,75.733098,44,1.000000,7.537784,-14.741818,0.000000,-7.204035


In [24]:
b7 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
b7 = b7.loc[b7['count'] >= 25]
b7['95%_confidence_interval_+'] = b7['mean'] + 1.96*(b7['standard_deviation']/(np.sqrt(b7['count'])))
b7['95%_confidence_interval_-'] = b7['mean'] - 1.96*(b7['standard_deviation']/(np.sqrt(b7['count'])))
b7['+/-range'] = (b7['95%_confidence_interval_+']-b7['95%_confidence_interval_-'])/2
b7 = b7.reset_index()
b7p = b7.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w']).agg(previous_count = ('count','sum'))
b7 = b7.merge(b7p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w'])
b7['%_of_previous_position'] = b7['count'] / b7['previous_count']
b7.loc[b7['count'] >= 25, 'countscore'] = 50*(b7['count'])**-0.5
b7.loc[b7['count'] >= 25, 'eloscore'] = (-0.08*b7['mean'])
b7.loc[b7['eloscore'] > 10, 'eloscore'] = 10
b7.loc[b7['count'] >= 25, 'rarityscore'] = ((b7['%_of_previous_position']**-1)-2)/2
b7.loc[b7['rarityscore'] < 0, 'rarityscore'] = 0
b7.loc[b7['rarityscore'] > 10, 'rarityscore'] = 10
b7['score'] = b7['countscore']+b7['eloscore']+b7['rarityscore']
b7.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,5w,5b,6w,6b,7w,7b,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,d4,Nf6,c4,g6,Nc3,Bg7,e4,d6,f3,O-O,Be3,Nc6,Nge2,e5,-142.866667,30,261.845273,-49.166539,-236.566794,93.700128,685,0.043796,9.128709,10.000000,10.000000,29.128709
1,e4,e6,d4,d5,Nd2,Nf6,e5,Nfd7,Bd3,c5,c3,Nc6,Ngf3,f6,-123.424242,33,280.152083,-27.838535,-219.009949,95.585707,797,0.041405,8.703883,9.873939,10.000000,28.577822
2,e4,e6,d4,d5,Nc3,Bb4,e5,c5,a3,Bxc3+,bxc3,Ne7,Nf3,h6,-120.857143,35,268.917234,-31.764737,-209.949548,89.092406,787,0.044473,8.451543,9.668571,10.000000,28.120114
3,e4,e6,d4,d5,Nc3,Bb4,e5,c5,a3,Bxc3+,bxc3,Ne7,h4,Bd7,-113.640000,25,318.433782,11.186042,-238.466042,124.826042,486,0.051440,10.000000,9.091200,8.720000,27.811200
4,e4,e6,d4,d5,Nd2,Nf6,e5,Nfd7,f4,c5,c3,Nc6,Ndf3,Qa5,-99.300000,30,301.488165,8.586154,-207.186154,107.886154,869,0.034522,9.128709,7.944000,10.000000,27.072709
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6981,e4,e5,Nf3,Nc6,Bb5,a6,Ba4,g6,d4,exd4,Nxd4,Bg7,Nxc6,bxc6,190.333333,27,339.630569,318.442723,62.223944,128.109389,27,1.000000,9.622504,-15.226667,0.000000,-5.604162
6982,d4,Nf6,c4,c5,d5,e6,Nc3,exd5,cxd5,Bd6,Nf3,O-O,Bg5,Re8,175.833333,42,282.338399,261.222224,90.444442,85.388891,42,1.000000,7.715167,-14.066667,0.000000,-6.351499
6983,d4,d5,Bf4,c5,e3,Nc6,c3,Qb6,Qb3,c4,Qc2,Bf5,Qxf5,Qxb2,192.393939,33,243.791527,275.573708,109.214171,83.179769,33,1.000000,8.703883,-15.391515,0.000000,-6.687632
6984,e4,e6,d3,d5,Nd2,Nf6,Ngf3,Be7,g3,O-O,Bg2,c5,O-O,Nc6,185.071429,42,255.511862,262.347048,107.795809,77.275619,42,1.000000,7.715167,-14.805714,0.000000,-7.090547


In [25]:
w8 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
w8 = w8.loc[w8['count'] >= 25]
w8['95%_confidence_interval_+'] = w8['mean'] + 1.96*(w8['standard_deviation']/(np.sqrt(w8['count'])))
w8['95%_confidence_interval_-'] = w8['mean'] - 1.96*(w8['standard_deviation']/(np.sqrt(w8['count'])))
w8['+/-range'] = (w8['95%_confidence_interval_+']-w8['95%_confidence_interval_-'])/2
w8 = w8.reset_index()
w8p = w8.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b']).agg(previous_count = ('count','sum'))
w8 = w8.merge(w8p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b'])
w8['%_of_previous_position'] = w8['count'] / w8['previous_count']
w8.loc[w8['count'] >= 25, 'countscore'] = 50*(w8['count'])**-0.5
w8.loc[w8['count'] >= 25, 'eloscore'] = (-0.08*w8['mean'])
w8.loc[w8['eloscore'] > 10, 'eloscore'] = 10
w8.loc[w8['count'] >= 25, 'rarityscore'] = ((w8['%_of_previous_position']**-1)-2)/2
w8.loc[w8['rarityscore'] < 0, 'rarityscore'] = 0
w8.loc[w8['rarityscore'] > 10, 'rarityscore'] = 10
w8['score'] = w8['countscore']+w8['eloscore']+w8['rarityscore']
w8.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,5w,5b,6w,6b,7w,7b,8w,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,e4,c5,Nf3,Nc6,d4,cxd4,Nxd4,g6,Nc3,Bg7,Be3,Nf6,Bc4,Qa5,Nb3,-167.720000,25,255.437618,-67.588454,-267.851546,100.131546,651,0.038402,10.000000,10.000000,10.000000,30.000000
1,e4,c5,Nf3,e6,d4,cxd4,Nxd4,Nc6,Nc3,Qc7,Be2,a6,O-O,Nf6,a3,-91.933333,30,247.026026,-3.536205,-180.330461,88.397128,1933,0.015520,9.128709,7.354667,10.000000,26.483376
2,d4,Nf6,c4,g6,Nc3,Bg7,e4,d6,Nf3,O-O,Be2,Na6,O-O,e5,dxe5,-75.629630,27,270.049162,26.233502,-177.492761,101.863131,765,0.035294,9.622504,6.050370,10.000000,25.672875
3,e4,c6,d4,d5,exd5,cxd5,c4,Nf6,Nc3,Nc6,Nf3,Bg4,cxd5,Nxd5,Be2,-93.466667,30,264.370917,1.137251,-188.070584,94.603918,587,0.051107,9.128709,7.477333,8.783333,25.389376
4,d4,Nf6,c4,g6,Nc3,Bg7,e4,d6,Nf3,O-O,Be2,Nbd7,O-O,e5,dxe5,-65.600000,25,277.038505,42.999094,-174.199094,108.599094,640,0.039062,10.000000,5.248000,10.000000,25.248000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6305,e4,c5,Nc3,Nc6,Bb5,Nd4,Bc4,g6,Nf3,Bg7,Nxd4,cxd4,Qf3,e6,Nb5,127.731707,123,269.793913,175.411643,80.051772,47.679936,123,1.000000,4.508348,-10.218537,0.000000,-5.710188
6306,d4,Nf6,c4,e6,Nc3,Bb4,e3,c5,Ne2,b6,a3,Ba5,Rb1,Na6,g3,150.000000,67,306.578726,223.410923,76.589077,73.410923,129,0.519380,6.108472,-12.000000,0.000000,-5.891528
6307,d4,d5,Bf4,c5,e3,Nc6,c3,Qb6,Qb3,c4,Qc2,Bf5,Qxf5,Qxb2,Qxd5,192.393939,33,243.791527,275.573708,109.214171,83.179769,33,1.000000,8.703883,-15.391515,0.000000,-6.687632
6308,d4,Nf6,c4,c5,d5,e6,Nc3,exd5,cxd5,Bd6,Nf3,O-O,Bg5,Re8,e3,183.097561,41,281.844177,269.370225,96.824897,86.272664,41,1.000000,7.808688,-14.647805,0.000000,-6.839117


In [26]:
b8 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
b8 = b8.loc[b8['count'] >= 25]
b8['95%_confidence_interval_+'] = b8['mean'] + 1.96*(b8['standard_deviation']/(np.sqrt(b8['count'])))
b8['95%_confidence_interval_-'] = b8['mean'] - 1.96*(b8['standard_deviation']/(np.sqrt(b8['count'])))
b8['+/-range'] = (b8['95%_confidence_interval_+']-b8['95%_confidence_interval_-'])/2
b8 = b8.reset_index()
b8p = b8.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w']).agg(previous_count = ('count','sum'))
b8 = b8.merge(b8p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w'])
b8['%_of_previous_position'] = b8['count'] / b8['previous_count']
b8.loc[b8['count'] >= 25, 'countscore'] = 50*(b8['count'])**-0.5
b8.loc[b8['count'] >= 25, 'eloscore'] = (-0.08*b8['mean'])
b8.loc[b8['eloscore'] > 10, 'eloscore'] = 10
b8.loc[b8['count'] >= 25, 'rarityscore'] = ((b8['%_of_previous_position']**-1)-2)/2
b8.loc[b8['rarityscore'] < 0, 'rarityscore'] = 0
b8.loc[b8['rarityscore'] > 10, 'rarityscore'] = 10
b8['score'] = b8['countscore']+b8['eloscore']+b8['rarityscore']
b8.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,5w,5b,6w,6b,7w,7b,8w,8b,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,e4,e5,Nf3,Nc6,Bb5,a6,Ba4,Nf6,O-O,b5,Bb3,Be7,Re1,d6,c3,Bg4,-80.677419,31,327.832314,34.728207,-196.083046,115.405627,652,0.047546,8.980265,6.454194,9.516129,24.950588
1,d4,Nf6,c4,g6,Nc3,Bg7,e4,d6,f3,O-O,Be3,Nc6,Nge2,a6,Qd2,Re8,-69.000000,30,288.629580,34.284769,-172.284769,103.284769,595,0.050420,9.128709,5.520000,8.916667,23.565376
2,d4,Nf6,Nf3,g6,c4,Bg7,Nc3,d5,cxd5,Nxd5,e4,Nxc3,bxc3,c5,Be3,Bg4,-155.714286,28,305.632175,-42.506544,-268.922028,113.207742,272,0.102941,9.449112,10.000000,3.857143,23.306255
3,e4,c5,Nf3,d6,d4,cxd4,Nxd4,Nf6,Nc3,Nc6,Bg5,e6,Qd2,a6,O-O-O,Qb6,-71.480769,52,321.944246,16.024674,-158.986212,87.505443,3492,0.014891,6.933752,5.718462,10.000000,22.652214
4,e4,c5,Nf3,e6,d4,cxd4,Nxd4,a6,Nc3,b5,Bd3,Qb6,Nb3,Qc7,O-O,d6,-87.080000,25,263.601960,16.251968,-190.411968,103.331968,332,0.075301,10.000000,6.966400,5.640000,22.606400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5421,d4,Nf6,c4,e6,Nc3,Bb4,e3,c5,Ne2,b6,a3,Ba5,Rb1,Na6,g3,Bb7,146.338462,65,306.798706,220.923706,71.753217,74.585244,65,1.000000,6.201737,-11.707077,0.000000,-5.505340
5422,e4,c5,Nc3,Nc6,Bb5,Nd4,Bc4,g6,Nf3,Bg7,Nxd4,cxd4,Qf3,e6,Nb5,d6,148.687500,80,267.901909,207.394094,89.980906,58.706594,111,0.720721,5.590170,-11.895000,0.000000,-6.304830
5423,d4,Nf6,c4,g6,Nc3,Bg7,e4,d6,Nf3,O-O,Be2,Nbd7,O-O,e5,Re1,c6,159.616438,73,304.872179,229.554314,89.678563,69.937875,115,0.634783,5.852057,-12.769315,0.000000,-6.917258
5424,d4,Nf6,Nf3,e6,e3,b6,Bd3,Bb7,O-O,c5,c4,Be7,Nc3,O-O,d5,exd5,200.428571,35,214.397142,271.458445,129.398698,71.029873,35,1.000000,8.451543,-16.034286,0.000000,-7.582743


In [27]:
w9 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b','9w']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
w9 = w9.loc[w9['count'] >= 25]
w9['95%_confidence_interval_+'] = w9['mean'] + 1.96*(w9['standard_deviation']/(np.sqrt(w9['count'])))
w9['95%_confidence_interval_-'] = w9['mean'] - 1.96*(w9['standard_deviation']/(np.sqrt(w9['count'])))
w9['+/-range'] = (w9['95%_confidence_interval_+']-w9['95%_confidence_interval_-'])/2
w9 = w9.reset_index()
w9p = w9.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b']).agg(previous_count = ('count','sum'))
w9 = w9.merge(w9p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b'])
w9['%_of_previous_position'] = w9['count'] / w9['previous_count']
w9.loc[w9['count'] >= 25, 'countscore'] = 50*(w9['count'])**-0.5
w9.loc[w9['count'] >= 25, 'eloscore'] = (-0.08*w9['mean'])
w9.loc[w9['eloscore'] > 10, 'eloscore'] = 10
w9.loc[w9['count'] >= 25, 'rarityscore'] = ((w9['%_of_previous_position']**-1)-2)/2
w9.loc[w9['rarityscore'] < 0, 'rarityscore'] = 0
w9.loc[w9['rarityscore'] > 10, 'rarityscore'] = 10
w9['score'] = w9['countscore']+w9['eloscore']+w9['rarityscore']
w9.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,5w,5b,6w,6b,7w,7b,8w,8b,9w,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,e4,c5,Nf3,Nc6,d4,cxd4,Nxd4,Nf6,Nc3,e5,Ndb5,d6,Nd5,Nxd5,exd5,Nb8,c3,-158.120000,25,338.363626,-25.481459,-290.758541,132.638541,989,0.025278,10.000000,10.000000,10.000000,30.000000
1,e4,c5,Nf3,e6,d4,cxd4,Nxd4,a6,Bd3,Nf6,O-O,Qc7,Qe2,d6,c4,g6,b3,-166.769231,26,301.912213,-50.717912,-282.820549,116.051319,485,0.053608,9.805807,10.000000,8.326923,28.132730
2,e4,e6,d4,d5,Nd2,c5,exd5,Qxd5,Ngf3,cxd4,Bc4,Qd6,O-O,Nf6,Nb3,Nc6,Nfxd4,-87.437500,32,258.009244,1.958142,-176.833142,89.395642,1723,0.018572,8.838835,6.995000,10.000000,25.833835
3,e4,Nf6,e5,Nd5,d4,d6,c4,Nb6,exd6,cxd6,Nc3,g6,Be3,Bg7,Rc1,O-O,Nf3,-49.703704,27,289.904071,59.648743,-159.056150,109.352446,687,0.039301,9.622504,3.976296,10.000000,23.598801
4,e4,e5,Nf3,Nc6,Bb5,a6,Ba4,Nf6,O-O,Be7,Re1,b5,Bb3,d6,c3,O-O,a3,-65.805556,36,325.892078,40.652523,-172.263635,106.458079,14796,0.002433,8.333333,5.264444,10.000000,23.597778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4636,e4,e5,Nf3,Nc6,Bb5,a6,Bxc6,dxc6,O-O,Qd6,Na3,Be6,Qe2,f6,Rd1,O-O-O,d4,198.968750,32,267.094702,291.512343,106.425157,92.543593,32,1.000000,8.838835,-15.917500,0.000000,-7.078665
4637,e4,c5,Nc3,Nc6,Bb5,Nd4,Bc4,g6,Nf3,Bg7,Nxd4,cxd4,Qf3,e6,Nb5,d6,Qa3,164.323944,71,246.877053,221.749819,106.898069,57.425875,71,1.000000,5.933908,-13.145915,0.000000,-7.212007
4638,e4,c5,Nf3,d6,d4,cxd4,Nxd4,Nf6,Nc3,a6,Bg5,e6,f4,Nbd7,Qf3,Be7,O-O-O,184.500000,44,245.548034,257.054808,111.945192,72.554808,44,1.000000,7.537784,-14.760000,0.000000,-7.222216
4639,d4,Nf6,c4,e6,Nf3,b6,g3,Ba6,Qc2,Bb7,Bg2,c5,d5,exd5,cxd5,Bxd5,Nc3,180.196078,51,235.406034,244.804417,115.587740,64.608339,51,1.000000,7.001400,-14.415686,0.000000,-7.414286


In [28]:
b9 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b','9w','9b']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
b9 = b9.loc[b9['count'] >= 25]
b9['95%_confidence_interval_+'] = b9['mean'] + 1.96*(b9['standard_deviation']/(np.sqrt(b9['count'])))
b9['95%_confidence_interval_-'] = b9['mean'] - 1.96*(b9['standard_deviation']/(np.sqrt(b9['count'])))
b9['+/-range'] = (b9['95%_confidence_interval_+']-b9['95%_confidence_interval_-'])/2
b9 = b9.reset_index()
b9p = b9.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b','9w']).agg(previous_count = ('count','sum'))
b9 = b9.merge(b9p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b','9w'])
b9['%_of_previous_position'] = b9['count'] / b9['previous_count']
b9.loc[b9['count'] >= 25, 'countscore'] = 50*(b9['count'])**-0.5
b9.loc[b9['count'] >= 25, 'eloscore'] = (-0.08*b9['mean'])
b9.loc[b9['eloscore'] > 10, 'eloscore'] = 10
b9.loc[b9['count'] >= 25, 'rarityscore'] = ((b9['%_of_previous_position']**-1)-2)/2
b9.loc[b9['rarityscore'] < 0, 'rarityscore'] = 0
b9.loc[b9['rarityscore'] > 10, 'rarityscore'] = 10
b9['score'] = b9['countscore']+b9['eloscore']+b9['rarityscore']
b9.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,5w,5b,6w,6b,7w,7b,8w,8b,9w,9b,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,e4,e5,Nf3,Nc6,Bb5,a6,Ba4,Nf6,O-O,b5,Bb3,Be7,Re1,d6,c3,O-O,h3,Re8,-90.851852,27,247.243527,2.408945,-184.112648,93.260796,436,0.061927,9.622504,7.268148,7.074074,23.964727
1,e4,c5,Nf3,d6,d4,cxd4,Nxd4,Nf6,Nc3,g6,Be3,Bg7,f3,O-O,Qd2,Nc6,g4,Bd7,-88.851852,27,352.028376,43.934017,-221.637721,132.785869,434,0.062212,9.622504,7.108148,7.037037,23.767690
2,e4,e5,Nf3,Nc6,Bb5,a6,Bxc6,dxc6,O-O,f6,d4,exd4,Nxd4,c5,Nb3,Qxd1,Rxd1,Be6,-85.074074,27,224.431406,-0.418060,-169.730089,84.656015,434,0.062212,9.622504,6.805926,7.037037,23.465467
3,e4,c5,Nf3,d6,d4,cxd4,Nxd4,Nf6,Nc3,g6,Be3,Bg7,f3,Nc6,Qd2,O-O,Bc4,Nxd4,-57.666667,36,257.426161,26.425879,-141.759213,84.092546,1040,0.034615,8.333333,4.613333,10.000000,22.946667
4,e4,e6,d4,d5,e5,c5,c3,Nc6,Nf3,Bd7,Be2,Nge7,Na3,cxd4,cxd4,Nf5,Nc2,Be7,-129.322581,31,301.268472,-23.268127,-235.377034,106.054453,306,0.101307,8.980265,10.000000,3.935484,22.915749
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3967,e4,c5,Nf3,Nc6,d4,cxd4,Nxd4,Nf6,Nc3,g6,Nxc6,bxc6,e5,Ng8,Bc4,Bg7,Qf3,f5,175.526316,38,290.038277,267.745142,83.307490,92.218826,38,1.000000,8.111071,-14.042105,0.000000,-5.931034
3968,e4,e5,Nf3,Nc6,Bb5,a6,Bxc6,dxc6,O-O,Qd6,Na3,Be6,Qe2,f6,Rd1,O-O-O,d4,Bg4,197.642857,28,282.196571,302.169930,93.115784,104.527073,28,1.000000,9.449112,-15.811429,0.000000,-6.362317
3969,e4,d5,exd5,Qxd5,Nc3,Qd6,d4,Nf6,Nf3,a6,g3,Bg4,Bg2,Nc6,O-O,O-O-O,d5,Nb4,175.120000,50,235.331259,240.350497,109.889503,65.230497,111,0.450450,7.071068,-14.009600,0.110000,-6.828532
3970,d4,Nf6,c4,e6,Nf3,b6,g3,Ba6,Qc2,Bb7,Bg2,c5,d5,exd5,cxd5,Bxd5,Nc3,Bc6,189.916667,36,218.767767,261.380804,118.452530,71.464137,36,1.000000,8.333333,-15.193333,0.000000,-6.860000


In [29]:
w10 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b','9w','9b','10w']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
w10 = w10.loc[w10['count'] >= 25]
w10['95%_confidence_interval_+'] = w10['mean'] + 1.96*(w10['standard_deviation']/(np.sqrt(w10['count'])))
w10['95%_confidence_interval_-'] = w10['mean'] - 1.96*(w10['standard_deviation']/(np.sqrt(w10['count'])))
w10['+/-range'] = (w10['95%_confidence_interval_+']-w10['95%_confidence_interval_-'])/2
w10 = w10.reset_index()
w10p = w10.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b','9w','9b']).agg(previous_count = ('count','sum'))
w10 = w10.merge(w10p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b','9w','9b'])
w10['%_of_previous_position'] = w10['count'] / w10['previous_count']
w10.loc[w10['count'] >= 25, 'countscore'] = 50*(w10['count'])**-0.5
w10.loc[w10['count'] >= 25, 'eloscore'] = (-0.08*w10['mean'])
w10.loc[w10['eloscore'] > 10, 'eloscore'] = 10
w10.loc[w10['count'] >= 25, 'rarityscore'] = ((w10['%_of_previous_position']**-1)-2)/2
w10.loc[w10['rarityscore'] < 0, 'rarityscore'] = 0
w10.loc[w10['rarityscore'] > 10, 'rarityscore'] = 10
w10['score'] = w10['countscore']+w10['eloscore']+w10['rarityscore']
w10.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,5w,5b,6w,6b,7w,7b,8w,8b,9w,9b,10w,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,d4,Nf6,c4,g6,Nc3,Bg7,e4,d6,f3,O-O,Be3,c5,dxc5,dxc5,Qxd8,Rxd8,Bxc5,Nc6,Rd1,-172.103448,29,263.359660,-76.250313,-267.956584,95.853136,345,0.084058,9.284767,10.000000,4.948276,24.233043
1,e4,c5,Nf3,d6,d4,cxd4,Nxd4,Nf6,Nc3,a6,Bg5,e6,f4,Qb6,Qd2,Qxb2,Rb1,Qa3,Bxf6,-98.454545,33,241.775217,-15.962726,-180.946365,82.491820,548,0.060219,8.703883,7.876364,7.303030,23.883277
2,e4,e5,Nf3,Nc6,Bb5,a6,Ba4,Nf6,O-O,Be7,Re1,b5,Bb3,d6,c3,O-O,h3,Nb8,a4,-43.074074,27,223.290760,41.151686,-127.299835,84.225761,3637,0.007424,9.622504,3.445926,10.000000,23.068430
3,e4,c5,Nf3,d6,d4,cxd4,Nxd4,Nf6,Nc3,a6,Be3,e5,Nb3,Be6,f3,h5,Qd2,Nbd7,a4,-94.030303,33,276.016472,0.144366,-188.204972,94.174669,461,0.071584,8.703883,7.522424,5.984848,22.211156
4,e4,e5,Nf3,Nc6,d4,exd4,Nxd4,Nf6,Nxc6,bxc6,e5,Qe7,Qe2,Nd5,c4,Nb6,Nc3,Qe6,Bd2,-68.000000,31,303.024311,38.672555,-174.672555,106.672555,490,0.063265,8.980265,5.440000,6.903226,21.323491
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3286,e4,c5,Nf3,Nc6,d4,cxd4,Nxd4,Nf6,Nc3,g6,Nxc6,bxc6,e5,Ng8,Bc4,Bg7,Qf3,f5,Bf4,175.526316,38,290.038277,267.745142,83.307490,92.218826,38,1.000000,8.111071,-14.042105,0.000000,-5.931034
3287,e4,Nf6,e5,Nd5,d4,d6,c4,Nb6,exd6,cxd6,Nc3,g6,Be3,Bg7,Rc1,O-O,b3,Nc6,d5,131.528455,123,277.040783,180.489111,82.567800,48.960655,123,1.000000,4.508348,-10.522276,0.000000,-6.013928
3288,d4,Nf6,Nf3,e6,e3,b6,Bd3,Bb7,O-O,c5,c4,Be7,Nc3,O-O,d5,exd5,cxd5,d6,e4,188.000000,32,214.520170,262.327447,113.672553,74.327447,32,1.000000,8.838835,-15.040000,0.000000,-6.201165
3289,e4,c5,Nf3,g6,d4,cxd4,Qxd4,Nf6,e5,Nc6,Qa4,Nd5,Qe4,Ndb4,Bb5,Qa5,Nc3,d5,exd6,193.187500,32,260.265551,283.364913,103.010087,90.177413,32,1.000000,8.838835,-15.455000,0.000000,-6.616165


In [30]:
b10 = chess.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b','9w','9b','10w','10b']).agg(mean = ('white_perf_diff_adjusted', 'mean'), count = ('white_perf_diff_adjusted', 'count'), standard_deviation = ('white_perf_diff_adjusted', 'std'))
b10 = b10.loc[b10['count'] >= 25]
b10['95%_confidence_interval_+'] = b10['mean'] + 1.96*(b10['standard_deviation']/(np.sqrt(b10['count'])))
b10['95%_confidence_interval_-'] = b10['mean'] - 1.96*(b10['standard_deviation']/(np.sqrt(b10['count'])))
b10['+/-range'] = (b10['95%_confidence_interval_+']-b10['95%_confidence_interval_-'])/2
b10 = b10.reset_index()
b10p = b10.groupby(['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b','9w','9b','10w']).agg(previous_count = ('count','sum'))
b10 = b10.merge(b10p, how = 'outer', on = ['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b','9w','9b','10w'])
b10['%_of_previous_position'] = b10['count'] / b10['previous_count']
b10.loc[b10['count'] >= 25, 'countscore'] = 50*(b10['count'])**-0.5
b10.loc[b10['count'] >= 25, 'eloscore'] = (-0.08*b10['mean'])
b10.loc[b10['eloscore'] > 10, 'eloscore'] = 10
b10.loc[b10['count'] >= 25, 'rarityscore'] = ((b10['%_of_previous_position']**-1)-2)/2
b10.loc[b10['rarityscore'] < 0, 'rarityscore'] = 0
b10.loc[b10['rarityscore'] > 10, 'rarityscore'] = 10
b10['score'] = b10['countscore']+b10['eloscore']+b10['rarityscore']
b10.sort_values(by = 'score', ascending = False).reset_index(drop = True)

Unnamed: 0,1w,1b,2w,2b,3w,3b,4w,4b,5w,5b,6w,6b,7w,7b,8w,8b,9w,9b,10w,10b,mean,count,standard_deviation,95%_confidence_interval_+,95%_confidence_interval_-,+/-range,previous_count,%_of_previous_position,countscore,eloscore,rarityscore,score
0,e4,e5,Nf3,Nc6,Bb5,a6,Ba4,Nf6,O-O,Nxe4,d4,b5,Bb3,d5,dxe5,Be6,Nbd2,Nc5,c3,g6,-97.312500,32,324.431059,15.097127,-209.722127,112.409627,825,0.038788,8.838835,7.785000,10.000000,26.623835
1,d4,Nf6,c4,g6,Nc3,d5,cxd5,Nxd5,e4,Nxc3,bxc3,Bg7,Bc4,c5,Ne2,Nc6,Be3,O-O,O-O,e6,-58.222222,27,258.765164,39.384561,-155.829005,97.606783,1035,0.026087,9.622504,4.657778,10.000000,24.280282
2,d4,Nf6,c4,e6,g3,c5,d5,exd5,cxd5,d6,Nc3,g6,Bg2,Bg7,Nf3,O-O,O-O,Re8,Bf4,h6,-122.142857,28,228.129871,-37.642370,-206.643344,84.500487,307,0.091205,9.449112,9.771429,4.482143,23.702683
3,d4,Nf6,c4,g6,Nc3,d5,cxd5,Nxd5,e4,Nxc3,bxc3,Bg7,Bc4,c5,Ne2,Nc6,Be3,O-O,O-O,cxd4,-38.111111,27,279.272314,67.231014,-143.453236,105.342125,1035,0.026087,9.622504,3.048889,10.000000,22.671393
4,e4,c5,Nf3,d6,d4,cxd4,Nxd4,Nf6,Nc3,g6,Be3,Bg7,f3,O-O,Qd2,Nc6,Bc4,Bd7,O-O-O,Qb8,-8.960000,25,346.447144,126.847280,-144.767280,135.807280,1193,0.020956,10.000000,0.716800,10.000000,20.716800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2675,e4,c5,Nf3,Nc6,d4,cxd4,Nxd4,Nf6,Nc3,g6,Nxc6,bxc6,e5,Ng8,Bc4,Bg7,Qf3,f5,Bf4,e6,175.448276,29,311.678931,288.887832,62.008720,113.439556,29,1.000000,9.284767,-14.035862,0.000000,-4.751095
2676,e4,e6,d4,d5,Nc3,Bb4,e5,Ne7,a3,Bxc3+,bxc3,b6,Qg4,Ng6,h4,h5,Qd1,Ba6,Bxa6,Nxa6,137.600000,70,242.397948,194.385309,80.814691,56.785309,70,1.000000,5.976143,-11.008000,0.000000,-5.031857
2677,e4,Nf6,e5,Nd5,d4,d6,c4,Nb6,exd6,cxd6,Nc3,g6,Be3,Bg7,Rc1,O-O,b3,Nc6,d5,Ne5,124.318584,113,278.749992,175.714867,72.922301,51.396283,113,1.000000,4.703604,-9.945487,0.000000,-5.241882
2678,e4,c5,Nf3,g6,d4,cxd4,Qxd4,Nf6,e5,Nc6,Qa4,Nd5,Qe4,Ndb4,Bb5,Qa5,Nc3,d5,exd6,Bf5,193.187500,32,260.265551,283.364913,103.010087,90.177413,32,1.000000,8.838835,-15.455000,0.000000,-6.616165


In [31]:
# I create a score database by concatenating all individual moves I have scored. This database is sorted by opening.

db = pd.concat([w1,b1,w2,b2,w3,b3,w4,b4,w5,b5,w6,b6,w7,b7,w8,b8,w9,b9,w10,b10], ignore_index = True)
db = db[['score','1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b','9w','9b','10w','10b','count','mean','+/-range','%_of_previous_position','previous_count','countscore','eloscore','rarityscore']]
db = db.sort_values(by = ['1w','1b','2w','2b','3w','3b','4w','4b','5w','5b','6w','6b','7w','7b','8w','8b','9w','9b','10w','10b']).reset_index(drop = True)

In [34]:
# Database with scores at or above 10.

db_high_scores = db.loc[db['score'] >= 10].reset_index(drop = True)

In [35]:
# Database with scores at or above 20.

db_very_high_scores = db.loc[db['score'] >= 20].reset_index(drop = True)

In [67]:
# For my Capstone presentation, I provided an example of looking for openings to target the London System. Here I filtered
# for high scoring openings as black at different depths of the London as white. 

# This definitely shows the potential of this scoring system but also that it is difficult for someone unfamiliar with 
# Python to use- I could not find a way to integrate this data into a dashboarding tool such as Tableau. One other weakness 
# I noticed is this system treats chess transpositions as different positions. For example, this system treats
#
#        1. d4 d5 2.c4
#   and  1. c4 d5 2.d4
# 
# as completely different openings- which they are- BUT they 'transpose' to reach the same position- this system doesn't 
# acknowledge this.

l1 = b1.loc[b1['1w'] == 'd4'].sort_values(by=('score'), ascending = False)
l2 = b2.loc[(b2['1w'] == 'd4')&(b2['2w'] == 'Bf4')].sort_values(by=('score'), ascending = False)
l3 = b3.loc[(b3['1w'] == 'd4')&(b3['2w'] == 'Bf4')&(b3['3w'] == 'Nf3')].sort_values(by=('score'), ascending = False)
l4 = b4.loc[(b4['1w'] == 'd4')&(b4['2w'] == 'Bf4')&(b4['3w'] == 'Nf3')&(b4['4w'] == 'e3')].sort_values(by=('score'), ascending = False)]
l5 = b5.loc[(b5['1w'] == 'd4')&(b5['2w'] == 'Bf4')&(b5['3w'] == 'Nf3')&(b5['4w'] == 'e3')&(b5['5w'] == 'Nbd2')].sort_values(by=('score'), ascending = False)