In [1]:
# takes 4h30m for 3m hand records
# perform experiments on standardized hand records.

# next steps:
# acbl-hand-records-chart-experiments.ipynb experiments with the trained data

# previous steps:
# acbl-hand-records.ipynb created acbl-hand-records.pkl

# todo:
# save model predictions
# let model know North hands. how? in terms of one hot 52 cards or 13/suit? QT? 
# plot accuracy of model
# remove all metrics except m.score?
# remove obsolete validation_df from model
# try feature scaling to improve accuracy?
# try changing activation function to sigmod/tanh/relu
# try using scikit's optimizer to try out various models.

# models:
# how often can expert bridge players predict dd? how can this be determined?
# create model to predict par score. different than dd?
# vary dd, hcp, sl to see how it effects predictions.
# chart of dd given sl and hcp is important. how does chart compare to predictions?
# show accuracy for each model

In [2]:
import pandas as pd
import pathlib
import pickle
import re
from collections import defaultdict
from IPython.display import display # needed to define display() method in vscode
import mlBridgeLib

In [3]:
# override pandas display options
mlBridgeLib.pd_options_display()

In [4]:
rootPath = pathlib.Path('e:/bridge/data')
acblPath = rootPath.joinpath('acbl')
savedModelsPath = acblPath.joinpath('SavedModels')
# create parent directories in case they don't already exist.
savedModelsPath.mkdir(parents=True, exist_ok=True)

In [30]:
# takes 35s
# load augmented_df
augmented_standardized_hand_records_file = acblPath.joinpath('acbl_hand_records_augmented.pkl')
with open(augmented_standardized_hand_records_file, 'rb') as f:
    augmented_df = pickle.load(f)

In [27]:
# Create views of train/validate split.
# todo: validate shapes, validate columns are only numeric or boolean
def create_train_valid_dfs(augmented_df, training_columns_regex, split, dep_var):

    training_df = augmented_df.filter(regex=training_columns_regex, axis='columns')
    print('Training columns:', training_df.columns)

    # why is this failing?
    #assert training_df.isna().any().any()

    # create training dataframes
    trainx = training_df[split].drop(dep_var,axis='columns')
    display(f'trainx: dtypes:{trainx.dtypes} len:{len(trainx)} shape:{trainx.shape}',trainx.head())
    assert (trainx.dtypes != 'object').all()
    trainy = training_df[split][dep_var]
    display(f'trainy: dtypes:{trainy.dtypes} len:{len(trainy)} shape:{trainy.shape}',trainy.head())
    assert trainy.dtype != 'object'

    #assert dep_var in trainx.columns # dep_var must be in training df
    assert dep_var == trainy.name # dep_var must be in training df

    # construct validation dataframes
    validx = training_df[~split].drop(dep_var,axis='columns')
    display(f'validx: dtypes:{validx.dtypes} len:{len(validx)} shape:{validx.shape}',validx.head())
    assert (validx.dtypes != 'object').all()
    validy = training_df[~split][dep_var]
    display(f'validy: dtypes:{validy.dtypes} len:{len(validy)} shape:{validy.shape}',validy.head())
    assert validy.dtype != 'object'

    #assert dep_var not in validx.columns # dep_var must not be in validation df
    assert dep_var == validy.name # dep_var must not be in validation df

    return trainx, trainy, validx, validy

In [28]:
import sklearn.pipeline
import sklearn.preprocessing
import sklearn.neural_network
import sklearn.compose

def Train(estimator, dep_var, trainx, trainy, validx, validy, **kwargs):

    column_transformers = []
    all_columns = []
    passthrough_columns = []
    # how to specify columns to be OneHotEncoded? ('ohe',sklearn.preprocessing.OneHotEncoder())
    pipes = [('boolean',None),('category',sklearn.preprocessing.OrdinalEncoder()),('number',sklearn.preprocessing.StandardScaler()),('string',sklearn.preprocessing.OrdinalEncoder())]
    for dt,p in pipes:
        columns = trainx.select_dtypes(dt).columns.to_list()
        print(f'{dt}:{columns}')
        if len(columns) > 0:
            if p is None:
                passthrough_columns += columns
            else:
                pipeline = sklearn.pipeline.Pipeline(steps=[(dt, p)])
                column_transformers.append((dt,pipeline,columns))
            all_columns += columns
    if len(passthrough_columns) > 0:
        column_transformers.append(('passthrough','passthrough',passthrough_columns))

    assert sorted(all_columns) == sorted(trainx.columns.to_list())

    preprocessor_pipeline = sklearn.compose.ColumnTransformer(column_transformers)

    if estimator == 'LinearRegression':
        estimator_func = sklearn.neural_network.LinearRegression(random_state=1, **kwargs)
    elif estimator == 'LogisticRegressionClassifier':
        estimator_func = sklearn.neural_network.LogisticRegressionClassifier(random_state=1, **kwargs)
    elif estimator == 'MLPClassifier':
        estimator_func = sklearn.neural_network.MLPClassifier(random_state=1, **kwargs)
    elif estimator == 'MLPRegressor':
        estimator_func = sklearn.neural_network.MLPRegressor(random_state=1, **kwargs)
#    elif estimator == 'Keras.Sequential':
#        estimator_func = sklearn.neural_network.MLPRegressor(random_state=1, **kwargs)
    else:
        assert 'Unknown Estimator', estimator
    
    m = sklearn.pipeline.Pipeline(steps=[
        ('preprocessor', preprocessor_pipeline),
        ('estimator', estimator_func)
    ])

    m.fit(trainx, trainy)
    predictionsEmbedded = m.predict(validx)
    probabilities = m.predict_proba(validx)
    predictionsCoefficientsdf = pd.DataFrame(m['estimator'].coefs_[0].T, columns=validx.columns)

    assert len(predictionsEmbedded) == len(validy)

    return m, predictionsEmbedded, probabilities, predictionsCoefficientsdf

In [31]:
# Classification neural network
import numpy as np
import tensorflow as tf # todo: change tensorflow. to tf.
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

## Split into train/test
#x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state=42)

# todo: pass in epochs, verbose level
# todo: make classifier and regression versions
def Keras_Sequential(trainx, trainy, validx, validy, **kwargs):
    print('GPUs Available: ', tf.config.list_physical_devices('GPU'))
    # todo: put asserts before split, outside of any loops, so they're one-time only?
    assert all(trainx.notna())
    assert all(trainy.notna())
    assert all(validx.notna())
    assert all(validy.notna())
    # todo: need to change tf.dtype per column (trainx, validx) not wholesale across all columns.
    # todo: implement bool conversion of Vul_??, strings for SL_Max_(NS|EW).
    t_trainx = tensorflow.convert_to_tensor(trainx, dtype=tensorflow.int16)
    t_trainy = tensorflow.convert_to_tensor(trainy, dtype=tensorflow.int16)
    t_validx = tensorflow.convert_to_tensor(validx, dtype=tensorflow.int16)
    t_validy = tensorflow.convert_to_tensor(validy, dtype=tensorflow.int16)
    print(type(t_trainx),t_trainx.shape)
    print(type(t_trainy),t_trainy.shape)
    print(type(t_validx),t_validx.shape)
    print(type(t_validy),t_validy.shape)
    model = Sequential()
    model.add(Dense(400, input_dim=t_trainx.shape[1], activation='relu'))
    model.add(Dense(200, activation='relu'))
    model.add(Dense(100, activation='relu'))
    # todo: was trainy.shape[1]
    model.add(Dense(1,activation='softmax')) # 1 is for single column result
    #model.compile(loss='categorical_crossentropy', optimizer=tensorflow.keras.optimizers.Adam(), metrics=['accuracy'])
    # be sure to use loss, metrics, optimizer which are compatible with estimator type (classifier, regression)
    model.compile(loss='mean_squared_error', metrics=['mean_squared_error'], optimizer=tensorflow.keras.optimizers.Adam())
    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto', restore_best_weights=True)
    #model.fit(t_trainx, t_trainy, validation_data=(t_validx, t_validy), callbacks=[monitor], verbose='auto', epochs=200)
    model.fit(trainx, trainy, validation_data=(validx, validy), callbacks=[monitor], verbose='auto', epochs=200)
    return model

In [32]:
# takes 4h15m for 3m hand records using sklearn's MLPClassifier
# 1) Select: dependent variable, columns for training, estimator.
# 2) Call train/valid split routine.
# 3) Call estimator
# 4) Save model
# takes 12m for 'DD_N_C' with max_iter=200 hidden_layer_sizes=(400, 100).
# takes 40m for 'Par_Score' with max_iter=200 hidden_layer_sizes=(400, 100). No convergence.
# takes 75m for 'Par_Score' with max_iter=500 hidden_layer_sizes=(600, 200). No convergence.

max_iter = 200 # max_iter maximum is 200. setting to smaller number not terminating earlier?
hidden_layer_sizes = (400, 200, 100) # todo: make global was 400,100

# select columns to train on.
training_columns = []
training_columns += ['Vul_(NS|EW)'] # all players know this info
# instead of HCP and QT, use one hot encoded list (52) of North hand?
training_columns += ['(HCP|QT|SL)_N_[SDHC]'] # players know their own hand. we'll use North.
training_columns += ['SL_Max_(NS|EW)'] # column name of longest suit fit. using higher ranking suit if tied.
training_columns += ['HCP_(NS|EW)','SL_(NS|EW)'] # players can somewhat infer this info from bidding
#training_columns += ['DD_(NS|EW)_[CDHSN]','CT_(NS|EW)_[CDHSN]','QT_(NS|EW)_[SHDC]'] # more difficult to infer
training_columns += ['Par_Score']
training_columns_regex = '|'.join(['^'+fc+'$' for fc in training_columns]) # set regex anchors so entire string is matched

estimator = 'MLPClassifier'

# todo: allow dep_vars that are not in training_columns. 
# todo: should Par_Score be categorical or int16?
dep_vars =  ['Par_Score'] # ['DD_NS_C']

for dep_var in dep_vars:

    # Create model's filename using unique values: iteration count, hidden layer sizes.
    saved_model_file = savedModelsPath.joinpath('_'.join([dep_var,str(max_iter)+'Iters','x'.join(str(hls) for hls in hidden_layer_sizes)])+'.pkl')

    # delete outputs
    print(f"Deleting model:{saved_model_file}")
    saved_model_file.unlink(missing_ok=True)

    # define train/validate split
    split = augmented_df['EventBoard']<"21"
    trainx, trainy, validx, validy = create_train_valid_dfs(augmented_df, training_columns_regex, split, dep_var)
    
    # train model
    if estimator.startswith('Keras'):
        # todo: need to select rewrite to pass Keras_Sequential_Regression/Classification
        model = Keras_Sequential(trainx, trainy, validx, validy, max_iter=max_iter, hidden_layer_sizes=hidden_layer_sizes, verbose=True)
        # todo: figure out how Keras returns predictions, probabilities, coefficients
    else:
        m, predictionsEmbedded, probabilities, predictionsCoefficientsdf = Train(estimator, dep_var, trainx, trainy, validx, validy, max_iter=max_iter, hidden_layer_sizes=hidden_layer_sizes, verbose=True)
        print(f"model score:{m.score(validx,validy)}")
    
    # save model
    print(f"Saving model: {saved_model_file}")
    saved_model = {'dep_var':dep_var,'trainx':trainx, 'trainy':trainy, 'validx':validx, 'validy':validy, 'm':m, 'predictionsEmbedded':predictionsEmbedded, 'probabilities':probabilities, 'predictionsCoefficientsdf':predictionsCoefficientsdf, 'max_iter':max_iter, 'hidden_layer_sizes':hidden_layer_sizes}
    with open(saved_model_file, 'wb') as f:
        pickle.dump(saved_model, f)
    print(f"Saved model size:{saved_model_file.stat().st_size}")
    
    # load model
    print(f"Loading model: {saved_model_file}")
    with open(saved_model_file, 'rb') as f:
        saved_model = pickle.load(f)
    dep_var, trainx, trainy, validx, validy, m, predictionsEmbedded, probabilities, predictionsCoefficientsdf, max_iter, hidden_layer_sizes = saved_model.values()
    
    # save again using filename of 'LatestModel.pkl'
    latest_model_file = savedModelsPath.joinpath('LatestModel.pkl')
    print(f"Saving model: {latest_model_file}")
    with open(latest_model_file, 'wb') as f:
        pickle.dump(saved_model, f)
    print(f"Latest model size:{latest_model_file.stat().st_size}")

Deleting model:e:\bridge\data\acbl\SavedModels\Par_Score_200Iters_400x200x100.pkl
Training columns: Index(['HCP_N_S', 'HCP_N_H', 'HCP_N_D', 'HCP_N_C', 'QT_N_S', 'QT_N_H',
       'QT_N_D', 'QT_N_C', 'SL_N_S', 'SL_N_H', 'SL_N_D', 'SL_N_C', 'HCP_NS',
       'HCP_EW', 'SL_Max_NS', 'SL_Max_EW', 'Par_Score', 'Vul_NS', 'Vul_EW'],
      dtype='object')


'trainx: dtypes:HCP_N_S          int8\nHCP_N_H          int8\nHCP_N_D          int8\nHCP_N_C          int8\nQT_N_S        float32\nQT_N_H        float32\nQT_N_D        float32\nQT_N_C        float32\nSL_N_S           int8\nSL_N_H           int8\nSL_N_D           int8\nSL_N_C           int8\nHCP_NS           int8\nHCP_EW           int8\nSL_Max_NS    category\nSL_Max_EW    category\nVul_NS           bool\nVul_EW           bool\ndtype: object len:2564459 shape:(2564459, 18)'

Unnamed: 0,HCP_N_S,HCP_N_H,HCP_N_D,HCP_N_C,QT_N_S,QT_N_H,QT_N_D,QT_N_C,SL_N_S,SL_N_H,SL_N_D,SL_N_C,HCP_NS,HCP_EW,SL_Max_NS,SL_Max_EW,Vul_NS,Vul_EW
0,0,3,1,5,0.0,0.5,0.0,1.0,2,3,2,6,21,19,SL_NS_H,SL_EW_S,False,True
1,0,4,1,4,0.0,0.5,0.0,0.5,3,2,3,5,22,18,SL_NS_D,SL_EW_S,True,True
2,0,0,6,3,0.0,0.0,1.5,0.0,1,3,4,5,23,17,SL_NS_H,SL_EW_S,False,True
3,0,3,1,1,0.0,0.0,0.0,0.0,1,6,2,4,12,28,SL_NS_H,SL_EW_D,True,False
4,0,2,0,7,0.0,0.0,0.0,2.0,1,5,2,5,13,27,SL_NS_H,SL_EW_S,True,True


'trainy: dtypes:category len:2564459 shape:(2564459,)'

0     200
1      90
2     140
3    -460
4   -1430
Name: Par_Score, dtype: category
Categories (75, int64): [-2220, -2210, -2140, -2000, ..., 2000, 2140, 2210, 2220]

'validx: dtypes:HCP_N_S          int8\nHCP_N_H          int8\nHCP_N_D          int8\nHCP_N_C          int8\nQT_N_S        float32\nQT_N_H        float32\nQT_N_D        float32\nQT_N_C        float32\nSL_N_S           int8\nSL_N_H           int8\nSL_N_D           int8\nSL_N_C           int8\nHCP_NS           int8\nHCP_EW           int8\nSL_Max_NS    category\nSL_Max_EW    category\nVul_NS           bool\nVul_EW           bool\ndtype: object len:397810 shape:(397810, 18)'

Unnamed: 0,HCP_N_S,HCP_N_H,HCP_N_D,HCP_N_C,QT_N_S,QT_N_H,QT_N_D,QT_N_C,SL_N_S,SL_N_H,SL_N_D,SL_N_C,HCP_NS,HCP_EW,SL_Max_NS,SL_Max_EW,Vul_NS,Vul_EW
5012,0,0,5,6,0.0,0.0,1.0,1.5,1,2,6,4,25,15,SL_NS_D,SL_EW_S,True,True
5013,0,4,1,4,0.0,0.5,0.0,1.0,0,4,2,7,19,21,SL_NS_C,SL_EW_S,True,True
5014,0,6,0,3,0.0,1.5,0.0,0.5,2,4,3,4,19,21,SL_NS_H,SL_EW_D,True,True
5015,3,0,7,0,0.0,0.0,1.5,0.0,5,3,4,1,20,20,SL_NS_H,SL_EW_C,True,True
5016,4,1,3,0,1.0,0.0,0.0,0.0,3,2,5,3,19,21,SL_NS_D,SL_EW_C,True,True


'validy: dtypes:category len:397810 shape:(397810,)'

5012    630
5013   -110
5014   -120
5015    600
5016   -130
Name: Par_Score, dtype: category
Categories (75, int64): [-2220, -2210, -2140, -2000, ..., 2000, 2140, 2210, 2220]

boolean:['Vul_NS', 'Vul_EW']
category:['SL_Max_NS', 'SL_Max_EW']
number:['HCP_N_S', 'HCP_N_H', 'HCP_N_D', 'HCP_N_C', 'QT_N_S', 'QT_N_H', 'QT_N_D', 'QT_N_C', 'SL_N_S', 'SL_N_H', 'SL_N_D', 'SL_N_C', 'HCP_NS', 'HCP_EW']
string:[]
Iteration 1, loss = 2.27661548
Iteration 2, loss = 2.04354210
Iteration 3, loss = 1.94747391
Iteration 4, loss = 1.89552685
Iteration 5, loss = 1.86121578
Iteration 6, loss = 1.83644691
Iteration 7, loss = 1.81729902
Iteration 8, loss = 1.80183124
Iteration 9, loss = 1.78864325
Iteration 10, loss = 1.77757172
Iteration 11, loss = 1.76771235
Iteration 12, loss = 1.75850377
Iteration 13, loss = 1.75075869
Iteration 14, loss = 1.74278461
Iteration 15, loss = 1.73614436
Iteration 16, loss = 1.73014042
Iteration 17, loss = 1.72427894
Iteration 18, loss = 1.71875032
Iteration 19, loss = 1.71374694
Iteration 20, loss = 1.70894332
Iteration 21, loss = 1.70450247
Iteration 22, loss = 1.70003224
Iteration 23, loss = 1.69579934
Iteration 24, loss = 1.69148498
Iteration 25, 

In [33]:
probabilities.shape, len(probabilities), len(validx)

((397810, 75), 397810, 397810)

In [34]:
#assert probabilities.shape == (len(validx), 14)

In [35]:
m, predictionsEmbedded, probabilities, predictionsCoefficientsdf

(Pipeline(steps=[('preprocessor',
                  ColumnTransformer(transformers=[('category',
                                                   Pipeline(steps=[('category',
                                                                    OrdinalEncoder())]),
                                                   ['SL_Max_NS', 'SL_Max_EW']),
                                                  ('number',
                                                   Pipeline(steps=[('number',
                                                                    StandardScaler())]),
                                                   ['HCP_N_S', 'HCP_N_H',
                                                    'HCP_N_D', 'HCP_N_C',
                                                    'QT_N_S', 'QT_N_H', 'QT_N_D',
                                                    'QT_N_C', 'SL_N_S', 'SL_N_H',
                                                    'SL_N_D', 'SL_N_C', 'HCP_NS',
                                

In [36]:
predictionsCoefficientsdf

Unnamed: 0,HCP_N_S,HCP_N_H,HCP_N_D,HCP_N_C,QT_N_S,QT_N_H,QT_N_D,QT_N_C,SL_N_S,SL_N_H,SL_N_D,SL_N_C,HCP_NS,HCP_EW,SL_Max_NS,SL_Max_EW,Vul_NS,Vul_EW
0,-0.75,0.07,-0.44,0.98,-0.98,0.58,0.92,-0.03,-0.95,-2.79,-0.50,0.24,-0.24,0.34,-0.12,0.16,-0.04,-0.85
1,-0.43,-0.65,-0.22,0.81,-0.45,2.59,-0.23,-0.59,1.47,-1.86,0.73,-0.43,-0.57,0.36,0.58,-0.45,-2.03,2.35
2,-1.91,-1.24,-0.14,0.57,-0.30,0.92,-0.14,1.29,-1.92,0.26,0.30,0.22,-0.89,0.49,-0.39,0.54,0.30,-0.02
3,-0.33,-0.02,2.32,-0.33,0.68,-0.08,-0.52,-1.27,-0.51,0.22,-1.80,0.83,0.36,0.46,-0.56,0.48,-1.44,-0.32
4,-0.61,0.18,0.26,-0.15,-1.58,0.71,-1.70,0.41,1.06,-1.40,0.67,0.25,-1.88,1.17,-0.63,0.61,-0.95,-0.42
5,-0.52,0.74,0.35,-1.12,1.29,-0.80,-0.85,0.91,-1.22,0.25,0.67,-1.20,0.26,0.14,0.59,-0.56,-1.75,0.55
6,-0.58,-1.07,-0.06,-0.18,-1.12,0.02,0.76,-2.47,0.19,0.46,-0.49,-0.50,1.26,-0.52,-0.43,0.54,-0.32,-0.50
7,-0.48,-1.20,0.35,0.22,0.55,-0.55,0.02,-0.03,0.32,1.52,2.12,-0.51,-0.94,-1.53,0.09,-0.29,0.10,0.87
8,0.20,-1.01,-0.47,-0.50,-1.70,-0.34,-0.15,-0.04,-0.52,-0.28,-0.79,-1.03,0.90,0.96,0.60,-0.49,-3.15,0.51
9,-0.03,-0.05,0.20,1.09,-0.97,-0.36,-0.70,-1.66,-0.79,0.33,0.54,0.20,-0.66,-0.02,-0.48,0.39,-0.21,0.52


In [37]:
[p for p in predictionsEmbedded]

[600,
 130,
 -140,
 140,
 110,
 140,
 -140,
 1370,
 -600,
 110,
 -620,
 -600,
 920,
 990,
 -100,
 140,
 990,
 140,
 -140,
 -400,
 400,
 -430,
 100,
 -450,
 100,
 -420,
 100,
 650,
 140,
 100,
 1100,
 100,
 -400,
 100,
 650,
 100,
 -2220,
 650,
 110,
 1400,
 1430,
 630,
 110,
 -620,
 130,
 110,
 -100,
 -2220,
 980,
 400,
 -100,
 -100,
 -100,
 400,
 430,
 100,
 -140,
 100,
 100,
 -450,
 140,
 420,
 100,
 650,
 -420,
 -420,
 -110,
 630,
 140,
 100,
 600,
 1430,
 400,
 420,
 -500,
 300,
 140,
 400,
 100,
 -420,
 -630,
 -140,
 100,
 -980,
 -140,
 -140,
 -100,
 -100,
 620,
 -100,
 100,
 2220,
 -110,
 -400,
 620,
 140,
 400,
 1440,
 -420,
 420,
 -140,
 110,
 -420,
 620,
 -400,
 -430,
 990,
 140,
 650,
 -600,
 -660,
 1400,
 650,
 -650,
 -110,
 -500,
 630,
 140,
 420,
 -600,
 140,
 -630,
 430,
 -500,
 -600,
 450,
 -450,
 140,
 100,
 400,
 100,
 -400,
 100,
 -100,
 140,
 -400,
 140,
 -140,
 1100,
 100,
 90,
 100,
 100,
 100,
 100,
 -660,
 -140,
 -140,
 600,
 100,
 -650,
 -1440,
 100,
 420,
 -400

In [38]:
[p for p in probabilities]

[array([3.54692812e-011, 1.52869169e-005, 4.08087515e-008, 2.70472961e-018,
        3.18995084e-010, 6.49102938e-018, 1.42511411e-011, 1.55490941e-010,
        7.39759043e-009, 4.18632195e-007, 2.47984592e-008, 1.57382511e-005,
        9.28880324e-018, 6.73484048e-010, 4.26874559e-010, 7.68911903e-005,
        3.41709531e-017, 4.33524315e-005, 1.94112352e-010, 1.05567254e-004,
        3.75278796e-006, 2.21697614e-003, 1.30039523e-011, 1.16293946e-008,
        2.94970524e-011, 1.03513242e-008, 3.18485455e-009, 3.98221120e-008,
        2.43552639e-003, 1.22544647e-003, 1.38211305e-005, 1.84397709e-006,
        3.92260463e-004, 2.33601225e-007, 7.54256860e-006, 1.00196934e-006,
        1.66823730e-017, 9.41873267e-153, 2.12038936e-010, 1.35036181e-006,
        9.58490012e-004, 4.34416714e-007, 9.34165839e-003, 1.64862101e-002,
        6.60844790e-002, 2.56718958e-002, 2.70795167e-002, 2.06054814e-006,
        2.60406452e-005, 1.07698577e-006, 2.08464787e-005, 5.52657155e-006,
        8.56

In [39]:
apdf = pd.DataFrame({'Actual_Par_Score':validy,'Pred_Par_Score':predictionsEmbedded})
apdf['Actual_Par_Score'] = apdf['Actual_Par_Score'].astype('int16')

In [40]:
apdf['Actual_Pred_Diff_Par_Score'] = (apdf['Actual_Par_Score']-apdf['Pred_Par_Score'])
apdf['Actual_Pred_Diff_Par_Score'].value_counts()

 0       83898
 30      23701
-30      18833
 20      12251
 10      11902
-10      11165
-20      11090
-280      7017
 280      6874
 40       6598
 480      5205
 200      5186
-40       5133
-200      4876
-480      4724
 530      4429
-240      4417
 520      4360
-530      3715
-780      3589
-520      3533
 780      3455
 510      3392
 310      3365
 240      3363
         ...  
-1390        1
-1460        1
-2020        1
-2610        1
 1820        1
 2420        1
-1870        1
-1630        1
-2010        1
 1530        1
 2090        1
 1840        1
 2660        1
 2080        1
 1500        1
-2320        1
-1030        1
-1930        1
 1760        1
 1870        1
-1820        1
 2120        1
 2030        1
 1630        1
-2080        1
Name: Actual_Pred_Diff_Par_Score, Length: 352, dtype: int64