In [1]:
# takes 4h for 4m pair summaries for all clubs, 12m for a few clubs (25,000 pair summaries)
# perform experiments on standardized hand records.

# next steps:
# pair_summaries_trained_data_experiments.ipynb (not created) experiments with trained pair summary data

# previous steps:
# acbl_pair_summaries.ipynb created pair_summaries_cleaned.pkl

# todo:
# merge bpr_pair_numbers in acbl_pair_summaries.ipynb
# remove warnings caused by performing operations on sliced dataframe. also remove warnings about re-indexing.
# explore use of strat*
# explore use of correlations -- cor()
# explore results from merging pair summary data with pair's board results.
# rename variables
# rename filenames

In [2]:
import pandas as pd
import pathlib
import pickle
import re
from collections import defaultdict
from IPython.display import display # needed to define display() method in vscode
import mlBridgeLib

In [3]:
# override pandas display options
mlBridgeLib.pd_options_display()

In [4]:
rootPath = pathlib.Path('e:/bridge/data')
acblPath = rootPath.joinpath('acbl')
savedModelsPath = acblPath.joinpath('SavedModels')
# create parent directories in case they don't already exist.
savedModelsPath.mkdir(parents=True, exist_ok=True)

In [5]:
# takes 15s
# read file containing boards which were reported by both acbl and tcg.
pair_summaries_file = acblPath.joinpath('pair_summaries_cleaned.pkl')
df_pair_summaries = pd.read_pickle(pair_summaries_file)
display(len(df_pair_summaries),df_pair_summaries.info(),df_pair_summaries.head())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4333555 entries, 0 to 4333554
Data columns (total 42 columns):
 #   Column                Dtype  
---  ------                -----  
 0   pair_summary_id       int64  
 1   section_id            int64  
 2   pair_number           int64  
 3   direction             string 
 4   strat                 Int8   
 5   percentage            float32
 6   is_eligible           boolean
 7   player_id             int64  
 8   player_number         string 
 9   player_name           string 
 10  mp_total              float32
 11  type                  string 
 12  rank                  string 
 13  session_id            int64  
 14  section_name          string 
 15  event_id              int64  
 16  hand_record_id        string 
 17  game_date             string 
 18  event_name            string 
 19  club_name             string 
 20  club_id_number        int64  
 21  event_type            string 
 22  rating                int8   
 23  board_s

4333555

None

Unnamed: 0,pair_summary_id,section_id,pair_number,direction,strat,percentage,is_eligible,player_id,player_number,player_name,mp_total,type,rank,session_id,section_name,event_id,hand_record_id,game_date,event_name,club_name,club_id_number,event_type,rating,board_scoring_method,stratification_type,winner_type,club_class,strat_id,strats_number,strats_label,strats_limit,strat_place_id,strat_place_number,strat_place_rank,strat_place_type,stratValues_id,stratValues_label,stratValues_limit,bpr_rank,bpr_pr,bpr_games,bpr_unit
0,1848542,119248,4,,1,62.5,True,3695653,8052557,"Robson, Dave",693.89,section,1,100961,M,100961,77908,2020-02-17 00:00:00,Monday Evening Pairs,Cariboo Bridge Club 1,100040,PAIRS,1,MATCH_POINTS,HIGHEST,1,4,[274629],[1],[A],[0],[1997656],[1],[1],[Section],[100961-1],[A],[0],,,,
1,1848542,119248,4,,1,62.5,True,3695652,tmp:1de5c0e8-a5a2-4433-a232-caa798c84939,"Gamache, Len(swap names)",19.0,section,1,100961,M,100961,77908,2020-02-17 00:00:00,Monday Evening Pairs,Cariboo Bridge Club 1,100040,PAIRS,1,MATCH_POINTS,HIGHEST,1,4,[274629],[1],[A],[0],[1997656],[1],[1],[Section],[100961-1],[A],[0],,,,
2,1848539,119248,1,,1,57.29,True,3695646,tmp:1238a3ab-2652-4f75-921b-767aae646b2a,"Steele, Bruce",16.0,section,2,100961,M,100961,77908,2020-02-17 00:00:00,Monday Evening Pairs,Cariboo Bridge Club 1,100040,PAIRS,1,MATCH_POINTS,HIGHEST,1,4,[274629],[1],[A],[0],[1997654],[1],[2],[Section],[100961-1],[A],[0],,,,
3,1848539,119248,1,,1,57.29,True,3695647,tmp:9f01395a-c179-4283-adf3-886ff6aed8aa,"Hlady, Henny",,section,2,100961,M,100961,77908,2020-02-17 00:00:00,Monday Evening Pairs,Cariboo Bridge Club 1,100040,PAIRS,1,MATCH_POINTS,HIGHEST,1,4,[274629],[1],[A],[0],[1997654],[1],[2],[Section],[100961-1],[A],[0],,,,
4,1848536,119248,3,,1,51.04,True,3695641,5637562,"Williams, Peter",139.21,section,3/4,100961,M,100961,77908,2020-02-17 00:00:00,Monday Evening Pairs,Cariboo Bridge Club 1,100040,PAIRS,1,MATCH_POINTS,HIGHEST,1,4,[274629],[1],[A],[0],[1997653],[1],[3/4],[Section],[100961-1],[A],[0],,,,


In [6]:
# takes 1s
augmented_df = df_pair_summaries.copy() #.reset_index(drop=True) # must reset index so list appends sync properly
#del df
display(len(augmented_df),df_pair_summaries.head())

4333555

Unnamed: 0,pair_summary_id,section_id,pair_number,direction,strat,percentage,is_eligible,player_id,player_number,player_name,mp_total,type,rank,session_id,section_name,event_id,hand_record_id,game_date,event_name,club_name,club_id_number,event_type,rating,board_scoring_method,stratification_type,winner_type,club_class,strat_id,strats_number,strats_label,strats_limit,strat_place_id,strat_place_number,strat_place_rank,strat_place_type,stratValues_id,stratValues_label,stratValues_limit,bpr_rank,bpr_pr,bpr_games,bpr_unit
0,1848542,119248,4,,1,62.5,True,3695653,8052557,"Robson, Dave",693.89,section,1,100961,M,100961,77908,2020-02-17 00:00:00,Monday Evening Pairs,Cariboo Bridge Club 1,100040,PAIRS,1,MATCH_POINTS,HIGHEST,1,4,[274629],[1],[A],[0],[1997656],[1],[1],[Section],[100961-1],[A],[0],,,,
1,1848542,119248,4,,1,62.5,True,3695652,tmp:1de5c0e8-a5a2-4433-a232-caa798c84939,"Gamache, Len(swap names)",19.0,section,1,100961,M,100961,77908,2020-02-17 00:00:00,Monday Evening Pairs,Cariboo Bridge Club 1,100040,PAIRS,1,MATCH_POINTS,HIGHEST,1,4,[274629],[1],[A],[0],[1997656],[1],[1],[Section],[100961-1],[A],[0],,,,
2,1848539,119248,1,,1,57.29,True,3695646,tmp:1238a3ab-2652-4f75-921b-767aae646b2a,"Steele, Bruce",16.0,section,2,100961,M,100961,77908,2020-02-17 00:00:00,Monday Evening Pairs,Cariboo Bridge Club 1,100040,PAIRS,1,MATCH_POINTS,HIGHEST,1,4,[274629],[1],[A],[0],[1997654],[1],[2],[Section],[100961-1],[A],[0],,,,
3,1848539,119248,1,,1,57.29,True,3695647,tmp:9f01395a-c179-4283-adf3-886ff6aed8aa,"Hlady, Henny",,section,2,100961,M,100961,77908,2020-02-17 00:00:00,Monday Evening Pairs,Cariboo Bridge Club 1,100040,PAIRS,1,MATCH_POINTS,HIGHEST,1,4,[274629],[1],[A],[0],[1997654],[1],[2],[Section],[100961-1],[A],[0],,,,
4,1848536,119248,3,,1,51.04,True,3695641,5637562,"Williams, Peter",139.21,section,3/4,100961,M,100961,77908,2020-02-17 00:00:00,Monday Evening Pairs,Cariboo Bridge Club 1,100040,PAIRS,1,MATCH_POINTS,HIGHEST,1,4,[274629],[1],[A],[0],[1997653],[1],[3/4],[Section],[100961-1],[A],[0],,,,


In [7]:
# takes 3s
print(augmented_df['event_type'].value_counts())
augmented_df = augmented_df[augmented_df['event_type'] == 'PAIRS']
augmented_df['event_type'].value_counts()

PAIRS    4333555
Name: event_type, dtype: Int64


PAIRS    4333555
Name: event_type, dtype: Int64

In [8]:
psi_vc = augmented_df['pair_summary_id'].value_counts()
psi_vc[psi_vc.ne(2)].head()

26620    18
26618    18
26625    18
26623    18
26619    18
Name: pair_summary_id, dtype: int64

In [9]:
# takes 2s
drop_broken_pairs = augmented_df[augmented_df['pair_summary_id'].isin(psi_vc[psi_vc.ne(2)].index)].index
print(len(drop_broken_pairs))
augmented_df.drop(drop_broken_pairs,inplace=True)

36267


In [10]:
# drop unmatched rows
#unpaired_rows = ~augmented_df.duplicated(subset=['pair_summary_id'],keep=False)
#display(sum(unpaired_rows),augmented_df[unpaired_rows])
#augmented_df.drop(augmented_df[unpaired_rows].index,inplace=True)

In [11]:
# triple check we can properly pair players
#augmented_df.sort_values('pair_summary_id',inplace=True)
psi_ne = augmented_df.iloc[0::2]
psi_sw = augmented_df.iloc[1::2]
assert all(psi_ne['pair_summary_id'].values == psi_sw['pair_summary_id'].values)

In [45]:
# takes 8s
insert_cols = ['pair_summary_id','player_id','player_number','player_name','mp_total','bpr_']
augmented_df = pd.merge(psi_ne,psi_sw.filter(regex='|'.join(insert_cols)),on='pair_summary_id')
augmented_df.sort_index(axis='columns',inplace=True)
# todo: merge bpr_pair_numbers in pair_summaries.
#display(augmented_df[augmented_df['bpr_games'].ne('')].head(),augmented_df.head())

Unnamed: 0,board_scoring_method,bpr_games_x,bpr_games_y,bpr_pr_x,bpr_pr_y,bpr_rank_x,bpr_rank_y,bpr_unit_x,bpr_unit_y,club_class,club_id_number,club_name,direction,event_id,event_name,event_type,game_date,hand_record_id,is_eligible,mp_total_x,mp_total_y,pair_number,pair_summary_id,percentage,player_id_x,player_id_y,player_name_x,player_name_y,player_number_x,player_number_y,rank,rating,section_id,section_name,session_id,strat,stratValues_id,stratValues_label,stratValues_limit,strat_id,strat_place_id,strat_place_number,strat_place_rank,strat_place_type,stratification_type,strats_label,strats_limit,strats_number,type,winner_type
643,MATCH_POINTS,45.0,,58.61000061035156,,2185.0,,183,,4,100222,Arlington Duplicate Bridg,,10239,Open Stratified Pairs,PAIRS,2019-08-22 00:00:00,8744,,1680.38,8336.61,8,181916,58.34,363832,363831,"White, Larry","White, Bill(swap names)",7371624,8554498,1,20,12111,A,10239,1,"[10239-1, 10239-2, 10239-3]","[A, B, C]","[0, 2500, 500]","[28481, 28482, 28483]","[200499, 200500]","[1, 1]","[1, 1]","[Section, Event]",HIGHEST,"[A, B, C]","[0, 2500, 500]","[1, 2, 3]",ssoveralls,1
653,MATCH_POINTS,80.0,,55.93000030517578,,4414.0,,183,,4,100222,Arlington Duplicate Bridg,EW,102812,Wednesday Aft Open Pairs,PAIRS,2020-02-19 00:00:00,79136,True,4479.92,2486.59,7,1878849,58.33,3756216,3756217,"Koenigseder, Becky","Round, Marlene",3193640,8587957,2,55,121191,K,102812,1,"[102812-1, 102812-2, 102812-3]","[A, B, C]","[0, 3000, 1750]","[279625, 279626, 279627]","[2030295, 2030296]","[1, 1]","[1, 2]","[Section, Event]",AVERAGE,"[A, B, C]","[0, 3000, 1750]","[1, 2, 3]",ssoveralls,2
655,MATCH_POINTS,45.0,,58.61000061035156,,2185.0,,183,,4,100222,Arlington Duplicate Bridg,EW,102812,Wednesday Aft Open Pairs,PAIRS,2020-02-19 00:00:00,79136,True,1864.79,1960.66,4,1878846,53.87,3756211,3756210,"White, Larry","Justice, James(swap names)",7371624,8431515,2,55,121191,K,102812,2,"[102812-1, 102812-2, 102812-3]","[A, B, C]","[0, 3000, 1750]","[279625, 279626, 279627]","[2030289, 2030290, 2030291, 2030292]","[1, 1, 2, 2]","[3, 5, 1, 2]","[Section, Event, Section, Event]",AVERAGE,"[A, B, C]","[0, 3000, 1750]","[1, 2, 3]",ssoveralls,2
659,MATCH_POINTS,45.0,61.0,58.61000061035156,59.84000015258789,2185.0,1465.0,183,183.0,4,100222,Arlington Duplicate Bridg,NS,10373,TGIF Stratified Pairs,PAIRS,2019-08-23 00:00:00,8862,,1680.38,14353.72,2,184443,56.85,368886,368885,"White, Larry","Spangler, Linda(swap names)",7371624,5469961,5,4,12273,S,10373,1,"[10373-1, 10373-2, 10373-3]","[A, B, C]","[0, 7000, 2000]","[28871, 28872, 28873]","[203076, 203077]","[1, 1]","[3, 5]","[Section, Event]",AVERAGE,"[A, B, C]","[0, 7000, 2000]","[1, 2, 3]",ssoveralls,2
663,MATCH_POINTS,57.0,,58.5,,2261.0,,183,,4,100222,Arlington Duplicate Bridg,EW,10373,TGIF Stratified Pairs,PAIRS,2019-08-23 00:00:00,8862,,8521.78,6063.39,2,184450,58.04,368900,368899,"Cassidy, Pat","Derby, Jerry(swap names)",6662250,5692091,3/4,4,12273,S,10373,1,"[10373-1, 10373-2, 10373-3]","[A, B, C]","[0, 7000, 2000]","[28871, 28872, 28873]","[203090, 203091]","[1, 1]","[2, 3/4]","[Section, Event]",AVERAGE,"[A, B, C]","[0, 7000, 2000]","[1, 2, 3]",ssoveralls,2


Unnamed: 0,board_scoring_method,bpr_games_x,bpr_games_y,bpr_pr_x,bpr_pr_y,bpr_rank_x,bpr_rank_y,bpr_unit_x,bpr_unit_y,club_class,club_id_number,club_name,direction,event_id,event_name,event_type,game_date,hand_record_id,is_eligible,mp_total_x,mp_total_y,pair_number,pair_summary_id,percentage,player_id_x,player_id_y,player_name_x,player_name_y,player_number_x,player_number_y,rank,rating,section_id,section_name,session_id,strat,stratValues_id,stratValues_label,stratValues_limit,strat_id,strat_place_id,strat_place_number,strat_place_rank,strat_place_type,stratification_type,strats_label,strats_limit,strats_number,type,winner_type
0,MATCH_POINTS,,,,,,,,,4,100040,Cariboo Bridge Club 1,,100961,Monday Evening Pairs,PAIRS,2020-02-17 00:00:00,77908,True,693.89,19.0,4,1848542,62.5,3695653,3695652,"Robson, Dave","Gamache, Len(swap names)",8052557,tmp:1de5c0e8-a5a2-4433-a232-caa798c84939,1,1,119248,M,100961,1,[100961-1],[A],[0],[274629],[1997656],[1],[1],[Section],HIGHEST,[A],[0],[1],section,1
1,MATCH_POINTS,,,,,,,,,4,100040,Cariboo Bridge Club 1,,100961,Monday Evening Pairs,PAIRS,2020-02-17 00:00:00,77908,True,16.0,,1,1848539,57.29,3695646,3695647,"Steele, Bruce","Hlady, Henny",tmp:1238a3ab-2652-4f75-921b-767aae646b2a,tmp:9f01395a-c179-4283-adf3-886ff6aed8aa,2,1,119248,M,100961,1,[100961-1],[A],[0],[274629],[1997654],[1],[2],[Section],HIGHEST,[A],[0],[1],section,1
2,MATCH_POINTS,,,,,,,,,4,100040,Cariboo Bridge Club 1,,100961,Monday Evening Pairs,PAIRS,2020-02-17 00:00:00,77908,True,139.21,1164.08,3,1848536,51.04,3695641,3695640,"Williams, Peter","Higgins, Pauline(swap names)",5637562,8360774,3/4,1,119248,M,100961,1,[100961-1],[A],[0],[274629],[1997653],[1],[3/4],[Section],HIGHEST,[A],[0],[1],section,1
3,MATCH_POINTS,,,,,,,,,4,100040,Cariboo Bridge Club 1,,100961,Monday Evening Pairs,PAIRS,2020-02-17 00:00:00,77908,True,11.0,2.0,6,1848540,51.04,3695648,3695649,"Penhale, Russ","Charlesworth, Jack",tmp:07c9f971-1117-4e23-b771-7b2eaebfb1b3,tmp:c6ae1a9b-fd89-42b0-8c5d-1b294b811c97,3/4,1,119248,M,100961,1,[100961-1],[A],[0],[274629],[1997655],[1],[3/4],[Section],HIGHEST,[A],[0],[1],section,1
4,MATCH_POINTS,,,,,,,,,4,100040,Cariboo Bridge Club 1,,110886,Monday Evening Pairs,PAIRS,2020-02-24 00:00:00,SHUFFLE,True,570.78,79.72,4,2010256,58.85,4018650,4018651,"Meyer, David","Meyer, George",1689746,6766013,1,1,129563,A,110886,1,[110886-1],[A],[0],[301290],[2172350],[1],[1],[Section],HIGHEST,[A],[0],[1],section,1


In [13]:
augmented_df[augmented_df.index.isin([265633])]

Unnamed: 0,board_scoring_method,bpr_games,bpr_pr,bpr_rank,bpr_unit,club_class,club_id_number,club_name,direction,event_id,event_name,event_type,game_date,hand_record_id,is_eligible,mp_total_x,mp_total_y,pair_number,pair_summary_id,percentage,player_id_x,player_id_y,player_name_x,player_name_y,player_number_x,player_number_y,rank,rating,section_id,section_name,session_id,strat,stratValues_id,stratValues_label,stratValues_limit,strat_id,strat_place_id,strat_place_number,strat_place_rank,strat_place_type,stratification_type,strats_label,strats_limit,strats_number,type,winner_type
265633,MATCH_POINTS,,,,,4,128728,Essex Bridge Center,NS,182227,#1711 Pairs Essex 99ers 10am,PAIRS,2020-08-24 00:00:00,97802,True,16.07,16.13,3,3288948,69.44,6574472,6574473,Philip Milot,Joanne Milot,8365784,8410232,1,1,207253,A,182227,1,"[182227-1, 182227-2, 182227-3]","[A, B, C]","[101, 13, 9]","[485320, 485321, 485322]",[3366526],[1],[1],[Section],HIGHEST,"[A, B, C]","[101, 13, 9]","[1, 2, 3]",section,2


In [14]:
# takes 2s
print(augmented_df['board_scoring_method'].value_counts())
augmented_df = augmented_df[augmented_df['board_scoring_method'] == 'MATCH_POINTS']
augmented_df['board_scoring_method'].value_counts()

MATCH_POINTS    2145051
BUTLER_IMPS        2868
CROSS_IMPS          716
IMPS                  9
Name: board_scoring_method, dtype: Int64


MATCH_POINTS    2145051
Name: board_scoring_method, dtype: Int64

In [15]:
augmented_df.isna().sum()

board_scoring_method         0
bpr_games                    0
bpr_pr                       0
bpr_rank                     0
bpr_unit                     0
club_class                   0
club_id_number               0
club_name                    0
direction               201718
event_id                     0
event_name                   0
event_type                   0
game_date                    0
hand_record_id           28397
is_eligible             425667
mp_total_x               16736
mp_total_y               41976
pair_number                  0
pair_summary_id              0
percentage                   0
player_id_x                  0
player_id_y                  0
player_name_x                0
player_name_y                0
player_number_x              0
player_number_y              0
rank                         0
rating                       0
section_id                   0
section_name                 0
session_id                   0
strat                        0
stratVal

In [16]:
# drop rows which have NA except if NA appear in these columns.
na_allowed_cols = ['is_eligible','hand_record_id']
remove_na_cols = augmented_df.columns.drop(na_allowed_cols)
remove_na_cols

Index(['board_scoring_method', 'bpr_games', 'bpr_pr', 'bpr_rank', 'bpr_unit',
       'club_class', 'club_id_number', 'club_name', 'direction', 'event_id',
       'event_name', 'event_type', 'game_date', 'mp_total_x', 'mp_total_y',
       'pair_number', 'pair_summary_id', 'percentage', 'player_id_x',
       'player_id_y', 'player_name_x', 'player_name_y', 'player_number_x',
       'player_number_y', 'rank', 'rating', 'section_id', 'section_name',
       'session_id', 'strat', 'stratValues_id', 'stratValues_label',
       'stratValues_limit', 'strat_id', 'strat_place_id', 'strat_place_number',
       'strat_place_rank', 'strat_place_type', 'stratification_type',
       'strats_label', 'strats_limit', 'strats_number', 'type', 'winner_type'],
      dtype='object')

In [17]:
augmented_df[remove_na_cols].isna().sum()

board_scoring_method         0
bpr_games                    0
bpr_pr                       0
bpr_rank                     0
bpr_unit                     0
club_class                   0
club_id_number               0
club_name                    0
direction               201718
event_id                     0
event_name                   0
event_type                   0
game_date                    0
mp_total_x               16736
mp_total_y               41976
pair_number                  0
pair_summary_id              0
percentage                   0
player_id_x                  0
player_id_y                  0
player_name_x                0
player_name_y                0
player_number_x              0
player_number_y              0
rank                         0
rating                       0
section_id                   0
section_name                 0
session_id                   0
strat                        0
stratValues_id               0
stratValues_label            0
stratVal

In [18]:
# todo: temp. Need to fillna()
augmented_df.drop(na_allowed_cols,axis='columns',inplace=True)
augmented_df.head()

Unnamed: 0,board_scoring_method,bpr_games,bpr_pr,bpr_rank,bpr_unit,club_class,club_id_number,club_name,direction,event_id,event_name,event_type,game_date,mp_total_x,mp_total_y,pair_number,pair_summary_id,percentage,player_id_x,player_id_y,player_name_x,player_name_y,player_number_x,player_number_y,rank,rating,section_id,section_name,session_id,strat,stratValues_id,stratValues_label,stratValues_limit,strat_id,strat_place_id,strat_place_number,strat_place_rank,strat_place_type,stratification_type,strats_label,strats_limit,strats_number,type,winner_type
0,MATCH_POINTS,,,,,4,100040,Cariboo Bridge Club 1,,100961,Monday Evening Pairs,PAIRS,2020-02-17 00:00:00,693.89,19.0,4,1848542,62.5,3695653,3695652,"Robson, Dave","Gamache, Len(swap names)",8052557,tmp:1de5c0e8-a5a2-4433-a232-caa798c84939,1,1,119248,M,100961,1,[100961-1],[A],[0],[274629],[1997656],[1],[1],[Section],HIGHEST,[A],[0],[1],section,1
1,MATCH_POINTS,,,,,4,100040,Cariboo Bridge Club 1,,100961,Monday Evening Pairs,PAIRS,2020-02-17 00:00:00,16.0,,1,1848539,57.29,3695646,3695647,"Steele, Bruce","Hlady, Henny",tmp:1238a3ab-2652-4f75-921b-767aae646b2a,tmp:9f01395a-c179-4283-adf3-886ff6aed8aa,2,1,119248,M,100961,1,[100961-1],[A],[0],[274629],[1997654],[1],[2],[Section],HIGHEST,[A],[0],[1],section,1
2,MATCH_POINTS,,,,,4,100040,Cariboo Bridge Club 1,,100961,Monday Evening Pairs,PAIRS,2020-02-17 00:00:00,139.21,1164.08,3,1848536,51.04,3695641,3695640,"Williams, Peter","Higgins, Pauline(swap names)",5637562,8360774,3/4,1,119248,M,100961,1,[100961-1],[A],[0],[274629],[1997653],[1],[3/4],[Section],HIGHEST,[A],[0],[1],section,1
3,MATCH_POINTS,,,,,4,100040,Cariboo Bridge Club 1,,100961,Monday Evening Pairs,PAIRS,2020-02-17 00:00:00,11.0,2.0,6,1848540,51.04,3695648,3695649,"Penhale, Russ","Charlesworth, Jack",tmp:07c9f971-1117-4e23-b771-7b2eaebfb1b3,tmp:c6ae1a9b-fd89-42b0-8c5d-1b294b811c97,3/4,1,119248,M,100961,1,[100961-1],[A],[0],[274629],[1997655],[1],[3/4],[Section],HIGHEST,[A],[0],[1],section,1
4,MATCH_POINTS,,,,,4,100040,Cariboo Bridge Club 1,,110886,Monday Evening Pairs,PAIRS,2020-02-24 00:00:00,570.78,79.72,4,2010256,58.85,4018650,4018651,"Meyer, David","Meyer, George",1689746,6766013,1,1,129563,A,110886,1,[110886-1],[A],[0],[301290],[2172350],[1],[1],[Section],HIGHEST,[A],[0],[1],section,1


In [19]:
# takes 3s
augmented_df.dropna(subset=remove_na_cols,inplace=True)
len(augmented_df)

1902604

In [20]:
# experimenting with scaling percentage from 0-100 to 0-1.
augmented_df['percentage'] = augmented_df['percentage']/100
augmented_df['percentage'].value_counts()

0.58    20783
0.56    20649
0.50    19533
0.53    19175
0.54    18758
0.56    14467
0.52    12054
0.61    11221
0.55    10045
0.57     9826
0.55     9491
0.51     9290
0.62     8779
0.52     8438
0.57     8237
0.54     7949
0.54     7655
0.55     7435
0.60     7274
0.53     7160
0.60     7100
0.56     6973
0.55     6878
0.53     6686
0.52     6525
        ...  
0.31        1
0.32        1
0.36        1
0.33        1
0.79        1
0.78        1
0.82        1
0.92        1
0.83        1
0.84        1
0.78        1
0.35        1
0.30        1
0.37        1
0.32        1
0.78        1
0.78        1
0.38        1
0.78        1
0.79        1
0.76        1
0.34        1
0.35        1
0.35        1
0.34        1
Name: percentage, Length: 4462, dtype: int64

In [21]:
# takes 1s
drop_rows = augmented_df['percentage'].lt(0) | augmented_df['percentage'].gt(1)
print(len(augmented_df),sum(drop_rows))
augmented_df.drop(augmented_df[drop_rows].index,inplace=True)

1902604 12


In [22]:
# takes 25s
# todo: too slow
# create column of player_numbers for each partnership, sorted.
augmented_df['pair_numbers'] = augmented_df.apply(lambda r: r['player_number_x']+'_'+r['player_number_y'] if r['player_number_x'] < r['player_number_y'] else r['player_number_y']+'_'+r['player_number_x'],axis='columns').astype('string')

In [23]:
augmented_df['SumMP'] = augmented_df['mp_total_x']+augmented_df['mp_total_y'].astype('float32')
augmented_df['GeoMP'] = augmented_df['mp_total_x']*augmented_df['mp_total_y'].astype('float32')

In [24]:
import sklearn.pipeline
import sklearn.preprocessing
import sklearn.neural_network
import sklearn.compose

def Train(estimator, dep_var, trainx, trainy, validx, validy, **kwargs):

    column_transformers = []
    all_columns = []
    passthrough_columns = []
    # how to specify columns to be OneHotEncoded? ('ohe',sklearn.preprocessing.OneHotEncoder())
    #pipes = [('boolean',None),('category',sklearn.preprocessing.OrdinalEncoder()),('number',sklearn.preprocessing.StandardScaler()),('string',sklearn.preprocessing.OrdinalEncoder())]
    pipes = [('category',sklearn.preprocessing.OrdinalEncoder()),('number',sklearn.preprocessing.StandardScaler()),('string',sklearn.preprocessing.OrdinalEncoder(handle_unknown = 'ignore'))]
    for dt,p in pipes:
        columns = trainx.select_dtypes(dt).columns.to_list()
        print(f'{dt}:{columns}')
        if len(columns) > 0:
            if p is None:
                passthrough_columns += columns
            else:
                pipeline = sklearn.pipeline.Pipeline(steps=[(dt, p)])
                column_transformers.append((dt,pipeline,columns))
            all_columns += columns
    if len(passthrough_columns) > 0:
        column_transformers.append(('passthrough','passthrough',passthrough_columns))

    assert sorted(all_columns) == sorted(trainx.columns.to_list())

    preprocessor_pipeline = sklearn.compose.ColumnTransformer(column_transformers)

    if estimator == 'LinearRegression':
        estimator_func = sklearn.neural_network.LinearRegression(random_state=1, **kwargs)
    elif estimator == 'LogisticRegressionClassifier':
        estimator_func = sklearn.neural_network.LogisticRegressionClassifier(random_state=1, **kwargs)
    elif estimator == 'MLPClassifier':
        estimator_func = sklearn.neural_network.MLPClassifier(random_state=1, **kwargs)
    elif estimator == 'MLPRegressor':
        estimator_func = sklearn.neural_network.MLPRegressor(random_state=1, **kwargs)
    else:
        assert 'Unknown Estimator', estimator
    
    m = sklearn.pipeline.Pipeline(steps=[
        ('preprocessor', preprocessor_pipeline),
        ('estimator', estimator_func)
    ])

    m.fit(trainx, trainy)
    #return m
    predictionsEmbedded = m.predict(validx)
    # todo: no predict_proba for MLPRegressor
    #probabilities = m.predict_proba(validx)
    probabilities = m.predict(validx)
    predictionsCoefficientsdf = pd.DataFrame(m['estimator'].coefs_[0].T, columns=validx.columns)

    assert len(predictionsEmbedded) == len(validy)

    return m, predictionsEmbedded, probabilities, predictionsCoefficientsdf

In [25]:
# Create views of train/validate split.
# todo: validate shapes, validate columns are only numeric or boolean
def create_train_valid_dfs(augmented_df, training_columns_regex, split, dep_var):

    training_df = augmented_df.filter(regex=training_columns_regex, axis='columns')
    print('Training columns:', training_df.columns)

    # why is this failing?
    #assert training_df.isna().any().any()

    # create training dataframes
    trainx = training_df[split].drop(dep_var,axis='columns')
    display(f'trainx: dtypes:{trainx.dtypes} len:{len(trainx)} shape:{trainx.shape}',trainx.head())
    assert (trainx.dtypes != 'object').all()
    trainy = training_df[split][dep_var]
    display(f'trainy: dtypes:{trainy.dtypes} len:{len(trainy)} shape:{trainy.shape}',trainy.head())
    assert trainy.dtype != 'object'

    #assert dep_var in trainx.columns # dep_var must be in training df
    assert dep_var == trainy.name # dep_var must be in training df

    # construct validation dataframes
    validx = training_df[~split].drop(dep_var,axis='columns')
    display(f'validx: dtypes:{validx.dtypes} len:{len(validx)} shape:{validx.shape}',validx.head())
    assert (validx.dtypes != 'object').all()
    validy = training_df[~split][dep_var]
    display(f'validy: dtypes:{validy.dtypes} len:{len(validy)} shape:{validy.shape}',validy.head())
    assert validy.dtype != 'object'

    #assert dep_var not in validx.columns # dep_var must not be in validation df
    assert dep_var == validy.name # dep_var must not be in validation df

    return trainx, trainy, validx, validy

In [26]:
# takes 10s
# save augmented_df
augmented_pair_summaries_file = acblPath.joinpath('pair_summaries_augmented.pkl')
with open(augmented_pair_summaries_file, 'wb') as f:
    pickle.dump(augmented_df, f)
print(f"Saved {augmented_pair_summaries_file}: size:{augmented_pair_summaries_file.stat().st_size}")

Saved e:\bridge\data\acbl\pair_summaries_augmented.pkl: size:669383042


In [27]:
# takes 8s
# load augmented_df
augmented_pair_summaries_file = acblPath.joinpath('pair_summaries_augmented.pkl')
with open(augmented_pair_summaries_file, 'rb') as f:
    augmented_df = pickle.load(f)

In [28]:
# todo: make one-liner?
# augmented_df.loc[augmented_df[split].index,'percentage']
def augment_pair_stats(df,train_indexes):
    png = df.loc[train_indexes].groupby('pair_numbers')
    pair_stats_d = {}
    for k,v in png:
        pair_stats_d[k] = (v['percentage'].count(),v['percentage'].mean(),v['percentage'].std())
    psdf = pd.DataFrame(df['pair_numbers'].map(pair_stats_d).values.tolist(),index=df.index,columns=['pair_count','pair_mean','pair_std'])
    df = pd.concat([df,psdf],axis='columns')
    # might have NAs
    #df.dropna(inplace=True)
    pair_dtypes_d = {'pair_count':'Int32','pair_mean':'float32','pair_std':'float32'}
    df = df.astype(pair_dtypes_d)
    return df

In [29]:
#print(len(augmented_df))
#augmented_df.dropna(inplace=True)
#augmented_df

In [30]:
augmented_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1902592 entries, 36 to 2148643
Data columns (total 47 columns):
 #   Column                Dtype  
---  ------                -----  
 0   board_scoring_method  string 
 1   bpr_games             string 
 2   bpr_pr                string 
 3   bpr_rank              string 
 4   bpr_unit              string 
 5   club_class            int8   
 6   club_id_number        int64  
 7   club_name             string 
 8   direction             string 
 9   event_id              int64  
 10  event_name            string 
 11  event_type            string 
 12  game_date             string 
 13  mp_total_x            float32
 14  mp_total_y            float32
 15  pair_number           int64  
 16  pair_summary_id       int64  
 17  percentage            float32
 18  player_id_x           int64  
 19  player_id_y           int64  
 20  player_name_x         string 
 21  player_name_y         string 
 22  player_number_x       string 
 23  player

In [31]:
# takes 1h30m-4h for 6m pair summaries. 2m-8m for individual clubs. 24m 400iter [108571,267096]. 10h.
# 1) Select: dependent variable, columns for training, estimator.
# 2) Call train/valid split routine.
# 3) Call estimator
# 4) Save model
# takes 12m for 'DD_N_C' with max_iter=200 hidden_layer_sizes=(400, 100).
# takes 40m for 'Par_Score' with max_iter=200 hidden_layer_sizes=(400, 100). No convergence.
# takes 75m for 'Par_Score' with max_iter=500 hidden_layer_sizes=(600, 200). No convergence.

max_iter, hidden_layer_sizes = 512, [1024]*2+[512]*2+[256]*2+[128]*2+[64]*2 # 8m30s. 10h for all but no convergence.
#max_iter, hidden_layer_sizes = 200, [512]*2+[256]*2+[128]*2+[64]*2 # todo: make global was 400,100

# restrict to clubs in list
allowed_club_id_numbers = [] #108571,267096,261735,267120] # must be int, not string
if allowed_club_id_numbers:
    augmented_df = augmented_df[augmented_df['club_id_number'].isin(allowed_club_id_numbers)]

# select columns to train on.
training_columns = []
training_columns += [
        'percentage', 
        'pair_summary_id',
        'player_number_x', 'player_number_y',
        'player_name_x', 'player_name_y',
        'mp_total_x', 'mp_total_y', 'GeoMP', 'SumMP',
        'pair_numbers', 'pair_count', 'pair_mean', 'pair_std',
        'direction', 'strat',
        'section_name', 'game_date', 'event_name',
        'club_id_number', 'event_type', 'rating',
        'stratification_type', 'winner_type', 'club_class',
        'bpr_rank', 'bpr_pr', 'bpr_games', 'bpr_unit' # todo: bpr_rank, bpr_pr might be tainting validation as they are forward looking?
        ]

#training_columns += augmented_df.columns.to_list()
training_columns_regex = '|'.join(['^'+fc+'$' for fc in training_columns]) # set regex anchors so entire string is matched

estimator = 'MLPRegressor'

# todo: allow dep_vars that are not in training_columns. 
dep_vars =  ['percentage']

for dep_var in dep_vars:

    # Create model's filename using unique values: iteration count, hidden layer sizes.
    saved_model_file = savedModelsPath.joinpath('_'.join([dep_var,str(max_iter)+'Iters','x'.join(str(hls) for hls in hidden_layer_sizes)])+'.pkl')

    # delete outputs
    print(f"Deleting model:{saved_model_file}")
    saved_model_file.unlink(missing_ok=True)
     
    # define train/validate split
    split = augmented_df['game_date']<'2021-06-01'
    augmented_df = augment_pair_stats(augmented_df,augmented_df[split].index) # augment with stats but only using training data. never valid data.
    augmented_df = augmented_df[augmented_df['pair_count'].ge(5)]
    trainx, trainy, validx, validy = create_train_valid_dfs(augmented_df, training_columns_regex, split, dep_var)

    # train model
    m, predictionsEmbedded, probabilities, predictionsCoefficientsdf = Train(estimator, dep_var, trainx, trainy, validx, validy, max_iter=max_iter, hidden_layer_sizes=hidden_layer_sizes, verbose=True)
    #m = Train(estimator, dep_var, trainx, trainy, validx, validy, max_iter=max_iter, hidden_layer_sizes=hidden_layer_sizes)
    print(f"Model score:{m.score(validx,validy)}")
    
    # save model
    print(f"Saving model: {saved_model_file}")
    saved_model = {'dep_var':dep_var,'trainx':trainx, 'trainy':trainy, 'validx':validx, 'validy':validy, 'm':m, 'predictionsEmbedded':predictionsEmbedded, 'probabilities':probabilities, 'predictionsCoefficientsdf':predictionsCoefficientsdf, 'max_iter':max_iter, 'hidden_layer_sizes':hidden_layer_sizes}
    with open(saved_model_file, 'wb') as f:
        pickle.dump(saved_model, f)
    print(f"Saved model len:{saved_model} size:{saved_model_file.stat().st_size}")
    
    # load model
    print(f"Loading model: {saved_model_file}")
    with open(saved_model_file, 'rb') as f:
        saved_model = pickle.load(f)
    dep_var, trainx, trainy, validx, validy, m, predictionsEmbedded, probabilities, predictionsCoefficientsdf, max_iter, hidden_layer_sizes = saved_model.values()
    
    # save again using filename of 'LatestModel.pkl'
    latest_model_file = savedModelsPath.joinpath('LatestModel.pkl')
    print(f"Saving model: {latest_model_file}")
    with open(latest_model_file, 'wb') as f:
        pickle.dump(saved_model, f)
    print(f"Latest model len:{saved_model} size:{latest_model_file.stat().st_size}")

Deleting model:e:\bridge\data\acbl\SavedModels\percentage_512Iters_1024x1024x512x512x256x256x128x128x64x64.pkl
Training columns: Index(['bpr_games', 'bpr_pr', 'bpr_rank', 'bpr_unit', 'club_class',
       'club_id_number', 'direction', 'event_name', 'event_type', 'game_date',
       'mp_total_x', 'mp_total_y', 'pair_summary_id', 'percentage',
       'player_name_x', 'player_name_y', 'player_number_x', 'player_number_y',
       'rating', 'section_name', 'strat', 'stratification_type', 'winner_type',
       'pair_numbers', 'SumMP', 'GeoMP', 'pair_count', 'pair_mean',
       'pair_std'],
      dtype='object')


  trainx = training_df[split].drop(dep_var,axis='columns')


'trainx: dtypes:bpr_games               string\nbpr_pr                  string\nbpr_rank                string\nbpr_unit                string\nclub_class                int8\nclub_id_number           int64\ndirection               string\nevent_name              string\nevent_type              string\ngame_date               string\nmp_total_x             float32\nmp_total_y             float32\npair_summary_id          int64\nplayer_name_x           string\nplayer_name_y           string\nplayer_number_x         string\nplayer_number_y         string\nrating                    int8\nsection_name            string\nstrat                     Int8\nstratification_type     string\nwinner_type               int8\npair_numbers            string\nSumMP                  float32\nGeoMP                  float32\npair_count               Int32\npair_mean              float32\npair_std               float32\ndtype: object len:1469218 shape:(1469218, 28)'

Unnamed: 0,bpr_games,bpr_pr,bpr_rank,bpr_unit,club_class,club_id_number,direction,event_name,event_type,game_date,mp_total_x,mp_total_y,pair_summary_id,player_name_x,player_name_y,player_number_x,player_number_y,rating,section_name,strat,stratification_type,winner_type,pair_numbers,SumMP,GeoMP,pair_count,pair_mean,pair_std
36,,,,,4,100040,NS,#1802 Pairs Unit 456 - 21 Boards,PAIRS,2020-08-19 00:00:00,1084.76,339.78,3261760,Maureen Larson,Mike Landry(swap names),1015699,8160996,1,A,1,HIGHEST,2,1015699_8160996,1424.54,368579.75,10,0.57,0.05
37,,,,,4,100040,NS,#1802 Pairs Unit 456 - 21 Boards,PAIRS,2020-08-19 00:00:00,1170.85,1043.55,3261762,John Perry,Julie Perry(swap names),4029062,6694985,1,A,1,HIGHEST,2,4029062_6694985,2214.4,1221840.5,25,0.57,0.06
38,,,,,4,100040,NS,#1802 Pairs Unit 456 - 21 Boards,PAIRS,2020-08-19 00:00:00,7.1,41.76,3261754,Vickie Roche,Stan Marinoske,7846509,9059474,1,A,2,HIGHEST,2,7846509_9059474,48.86,296.5,53,0.56,0.06
40,,,,,4,100040,EW,#1802 Pairs Unit 456 - 21 Boards,PAIRS,2020-08-19 00:00:00,414.55,208.47,3261767,F Mark Davey,William Curtis,1572539,9124837,1,A,2,HIGHEST,2,1572539_9124837,623.02,86421.23,72,0.56,0.06
41,,,,,4,100040,EW,#1802 Pairs Unit 456 - 21 Boards,PAIRS,2020-08-19 00:00:00,901.9,1156.03,3261755,Robert Miller,Connie Sayler(swap names),2433087,7892748,1,A,1,HIGHEST,2,2433087_7892748,2057.93,1042623.5,34,0.55,0.05


  trainy = training_df[split][dep_var]


'trainy: dtypes:float32 len:1469218 shape:(1469218,)'

36   0.56
37   0.54
38   0.52
40   0.67
41   0.58
Name: percentage, dtype: float32

  validx = training_df[~split].drop(dep_var,axis='columns')


'validx: dtypes:bpr_games               string\nbpr_pr                  string\nbpr_rank                string\nbpr_unit                string\nclub_class                int8\nclub_id_number           int64\ndirection               string\nevent_name              string\nevent_type              string\ngame_date               string\nmp_total_x             float32\nmp_total_y             float32\npair_summary_id          int64\nplayer_name_x           string\nplayer_name_y           string\nplayer_number_x         string\nplayer_number_y         string\nrating                    int8\nsection_name            string\nstrat                     Int8\nstratification_type     string\nwinner_type               int8\npair_numbers            string\nSumMP                  float32\nGeoMP                  float32\npair_count               Int32\npair_mean              float32\npair_std               float32\ndtype: object len:80061 shape:(80061, 28)'

Unnamed: 0,bpr_games,bpr_pr,bpr_rank,bpr_unit,club_class,club_id_number,direction,event_name,event_type,game_date,mp_total_x,mp_total_y,pair_summary_id,player_name_x,player_name_y,player_number_x,player_number_y,rating,section_name,strat,stratification_type,winner_type,pair_numbers,SumMP,GeoMP,pair_count,pair_mean,pair_std
377,,,,,4,100040,NS,#20898 Pairs Unit 456 21 Boards,PAIRS,2021-06-02 00:00:00,260.34,222.13,5782772,Michael Anthony,Denise Anthony(swap names),1271903,3337391,1,A,2,AVERAGE,1,1271903_3337391,482.47,57829.32,24,0.56,0.05
378,,,,,4,100040,NS,#20898 Pairs Unit 456 21 Boards,PAIRS,2021-06-02 00:00:00,1050.02,1200.27,5782774,Robert Miller,Connie Sayler(swap names),2433087,7892748,1,A,1,AVERAGE,1,2433087_7892748,2250.29,1260307.5,34,0.55,0.05
379,,,,,4,100040,NS,#20898 Pairs Unit 456 21 Boards,PAIRS,2021-06-02 00:00:00,1103.91,762.09,5782764,Michael Petrescu,David McWalter(swap names),2373424,2779099,1,A,1,AVERAGE,1,2373424_2779099,1866.0,841278.81,12,0.58,0.04
380,,,,,4,100040,NS,#20898 Pairs Unit 456 21 Boards,PAIRS,2021-06-02 00:00:00,24.9,35.39,5782770,Victoria Handford,Dan Scarffe,1407929,1407937,1,A,3,AVERAGE,1,1407929_1407937,60.29,881.21,28,0.54,0.05
381,,,,,4,100040,EW,#20898 Pairs Unit 456 21 Boards,PAIRS,2021-06-02 00:00:00,907.08,673.39,5782763,James Scarfe,Kirk Rustad,3449335,6451748,1,A,2,AVERAGE,1,3449335_6451748,1580.47,610818.62,32,0.58,0.06


  validy = training_df[~split][dep_var]


'validy: dtypes:float32 len:80061 shape:(80061,)'

377   0.53
378   0.51
379   0.50
380   0.45
381   0.66
Name: percentage, dtype: float32

category:[]
number:['club_class', 'club_id_number', 'mp_total_x', 'mp_total_y', 'pair_summary_id', 'rating', 'strat', 'winner_type', 'SumMP', 'GeoMP', 'pair_count', 'pair_mean', 'pair_std']
string:['bpr_games', 'bpr_pr', 'bpr_rank', 'bpr_unit', 'direction', 'event_name', 'event_type', 'game_date', 'player_name_x', 'player_name_y', 'player_number_x', 'player_number_y', 'section_name', 'stratification_type', 'pair_numbers']
Iteration 1, loss = 516.63273053
Iteration 2, loss = 0.01702457
Iteration 3, loss = 2.32756619
Iteration 4, loss = 0.00215885
Iteration 5, loss = 0.00329314
Iteration 6, loss = 0.00174164
Iteration 7, loss = 0.00156286
Iteration 8, loss = 0.00150858
Iteration 9, loss = 0.00150216
Iteration 10, loss = 0.00149858
Iteration 11, loss = 0.00149568
Iteration 12, loss = 0.00149513
Iteration 13, loss = 0.00149614
Iteration 14, loss = 0.00149537
Iteration 15, loss = 0.00149556
Iteration 16, loss = 0.00149607
Iteration 17, loss = 0.00149558
Iteration 18, loss = 0.00149526
Train

Saving model: e:\bridge\data\acbl\SavedModels\LatestModel.pkl
Latest model len:{'dep_var': 'percentage', 'trainx':         bpr_games bpr_pr bpr_rank bpr_unit  club_class  club_id_number direction                                   event_name event_type            game_date  mp_total_x  mp_total_y  pair_summary_id        player_name_x                 player_name_y player_number_x player_number_y  rating section_name  strat stratification_type  winner_type     pair_numbers   SumMP      GeoMP  pair_count  pair_mean  pair_std
36                                                   4          100040        NS             #1802 Pairs Unit 456 - 21 Boards      PAIRS  2020-08-19 00:00:00     1084.76      339.78          3261760       Maureen Larson       Mike Landry(swap names)         1015699         8160996       1            A      1             HIGHEST            2  1015699_8160996 1424.54  368579.75          10       0.57      0.05
37                                                   4       

### Perform some minimal tests on trained data. Use next step's notebook for further experiments.

In [32]:
probabilities.shape, len(probabilities), len(validx)

((80061,), 80061, 80061)

In [33]:
#assert probabilities.shape == (len(validx), 14)

In [34]:
m, predictionsEmbedded, probabilities, predictionsCoefficientsdf

(Pipeline(steps=[('preprocessor',
                  ColumnTransformer(transformers=[('number',
                                                   Pipeline(steps=[('number',
                                                                    StandardScaler())]),
                                                   ['club_class',
                                                    'club_id_number',
                                                    'mp_total_x', 'mp_total_y',
                                                    'pair_summary_id', 'rating',
                                                    'strat', 'winner_type',
                                                    'SumMP', 'GeoMP',
                                                    'pair_count', 'pair_mean',
                                                    'pair_std']),
                                                  ('string',
                                                   Pipeline(steps=[('string',
           

In [35]:
predictionsCoefficientsdf

Unnamed: 0,bpr_games,bpr_pr,bpr_rank,bpr_unit,club_class,club_id_number,direction,event_name,event_type,game_date,mp_total_x,mp_total_y,pair_summary_id,player_name_x,player_name_y,player_number_x,player_number_y,rating,section_name,strat,stratification_type,winner_type,pair_numbers,SumMP,GeoMP,pair_count,pair_mean,pair_std
0,0.00,-0.00,0.00,0.00,-0.00,-0.00,-0.00,0.00,0.00,0.00,0.00,0.00,0.00,-0.00,-0.00,-0.00,-0.00,-0.00,0.00,0.00,-0.00,0.00,0.00,-0.00,-0.00,0.00,-0.00,-0.00
1,0.00,0.00,-0.00,0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00
2,-0.00,-0.00,0.00,0.00,-0.00,0.00,0.00,-0.00,0.00,0.00,0.00,-0.00,0.00,-0.00,-0.00,0.00,-0.00,-0.00,-0.00,0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,0.00,-0.00
3,-0.00,0.00,0.00,0.00,-0.00,0.00,0.00,0.00,0.00,-0.00,0.00,0.00,-0.00,0.00,0.00,-0.00,0.00,0.00,0.00,-0.00,-0.00,0.00,-0.00,-0.00,0.00,-0.00,-0.00,0.00
4,-0.00,-0.00,0.00,0.00,-0.00,0.00,0.00,-0.00,0.00,0.00,0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,0.00,0.00,0.00,-0.00,-0.00,-0.00,-0.00,-0.00,0.00,-0.00,-0.00
5,-0.00,-0.00,-0.00,-0.00,0.00,0.00,0.00,0.00,-0.00,-0.00,-0.00,-0.00,-0.00,0.00,0.00,-0.00,0.00,0.00,-0.01,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,0.00,-0.00
6,0.00,0.00,-0.00,-0.00,0.00,-0.00,-0.00,0.00,-0.00,-0.00,-0.00,0.00,0.00,0.00,-0.00,0.00,-0.00,0.00,-0.00,-0.00,-0.00,0.00,-0.00,0.00,0.00,-0.00,0.00,0.00
7,0.00,0.00,0.00,-0.00,-0.00,0.00,0.00,-0.00,0.00,-0.00,0.00,0.00,0.00,0.00,-0.00,0.00,0.00,0.00,0.00,-0.00,0.00,0.00,-0.00,0.00,-0.00,-0.00,-0.00,0.00
8,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,0.00,0.00,-0.00,-0.00,-0.00,0.00,0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,0.00
9,0.00,0.00,0.00,0.00,-0.00,0.00,-0.00,0.00,0.00,0.00,0.00,0.00,0.00,-0.00,-0.00,-0.00,-0.00,-0.00,-0.00,0.00,-0.00,-0.00,-0.00,0.00,-0.00,-0.00,0.00,0.00


In [36]:
[p for p in predictionsEmbedded]

[0.5603249439039648,
 0.5588732225151799,
 0.5597154155579348,
 0.5628494061961642,
 0.5561209620124993,
 0.5561209620124993,
 0.5561209620124993,
 0.5594870973105005,
 0.5561209620124993,
 0.5561209620124993,
 0.5561209620124993,
 0.5603249439874148,
 0.5597154068495702,
 0.5628494002442105,
 0.5561209620124993,
 0.5561209620124993,
 0.5561209620124993,
 0.5561209620124993,
 0.5561209620124993,
 0.5603249376625484,
 0.5597151843215543,
 0.5588732236902112,
 0.5603247091633209,
 0.5561209620124993,
 0.5561209620124993,
 0.5561209620124993,
 0.5561209620124993,
 0.5561209620124993,
 0.5628494057744995,
 0.5561209620124993,
 0.5597154110758984,
 0.5561209620124993,
 0.5561209620124993,
 0.5610232385223562,
 0.5561209620124993,
 0.5582097814291859,
 0.558535180827822,
 0.5573274504213669,
 0.5585971969246688,
 0.5561975980218489,
 0.5561209620124993,
 0.5561209620124993,
 0.5561209620124993,
 0.5579201774608407,
 0.5568480849784186,
 0.5601820137652576,
 0.5565655561396462,
 0.55956280712

In [37]:
[round(p,2) for p in probabilities]

[0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.58,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.58,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,
 0.56,