In [2]:
import tensorflow as tf
import keras_models as kms
import metrics
import bnn_predictions as bnn
import numpy as np
import sklearn.model_selection
import sklearn.preprocessing
import tools
import keras_losses as kl

  from ._conv import register_converters as _register_converters


In [3]:
fifa_a = np.genfromtxt('../../data/kaggle/FIFA_2018_Statistics_data1.csv', delimiter=',')
x = fifa_a[:,:-1]
y = fifa_a[:,-1]

In [4]:
#MPE with prior (regularisation)

In [5]:
#architecture parameters
num_inputs = x.shape[1]
num_outputs = 2
layer_sizes = []
reg_coeffs = [0.5, 0.5]
#propagation parameters
epochs = 1000
m = x.shape[0]
training_size = 48. / 64.
test_size = 1 - training_size
train_num = int(m * training_size)
batch_num = 1 * train_num
test_num = int(m * test_size)

In [6]:
#compile model
model_r = kms.slp_sm_r(num_inputs, num_outputs, layer_sizes, reg_coeffs)
model_r.compile(optimizer='adagrad',
              loss=kl.sum_of_categorical_crossentropy,
              metrics=['accuracy'])

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [7]:
MPE_test_losses = []
MPE_test_accuracies = []
seed_ns = range(1,11)
for seed_n in seed_ns:
    np.random.seed(seed_n)
    x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x, y, test_size = test_size)
    y_train = tf.keras.utils.to_categorical(y_train, num_outputs)
    y_test = tf.keras.utils.to_categorical(y_test, num_outputs)

    #scale training data to have zero mean and unit variance
    scaler = sklearn.preprocessing.StandardScaler(copy = False)
    scaler.fit(x_train)
    scaler.transform(x_train)
    scaler.transform(x_test)

    #for sake of fairness, try all analysis with same training/test split, so save these to disc.
    #note y is now stored in categorical form
    np.savetxt("../../data/kaggle/FIFA_2018_Statistics_x_tr_" + str(seed_n) + ".csv", x_train, delimiter=",")
    np.savetxt("../../data/kaggle/FIFA_2018_Statistics_y_tr_" + str(seed_n) + ".csv", y_train, delimiter=",")
    np.savetxt("../../data/kaggle/FIFA_2018_Statistics_x_te_" + str(seed_n) + ".csv", x_test, delimiter=",")
    np.savetxt("../../data/kaggle/FIFA_2018_Statistics_y_te_" + str(seed_n) + ".csv", y_test, delimiter=",")
    #train model
    history_r = model_r.fit(x_train, y_train,
                        batch_size=batch_num,
                        epochs=epochs,
                        verbose=0)
                            
    #save model
    model_r.save("../../MPE_examples/saved_keras_models/kaggle/FIFA_2018_Statistics_slp_sm_" + str(seed_n) + ".h5")
    
    #calculate evaluation metrics
    y_pred_r = model_r.predict(x_test)
    y_test, y_pred_r = tools.check_dtypes(y_test, y_pred_r)
    test_loss_r = metrics.categorical_crossentropy(y_test, y_pred_r) #averaged over test set size
    test_acc_r = metrics.categorical_accuracy(y_test, y_pred_r)
    MPE_test_losses.append(test_loss_r)
    MPE_test_accuracies.append(test_acc_r)

In [66]:
#this is for testing purposes 
#model.evaluate should be used with caution, as if mode contains regularisation, the method will include it
score_r = model_r.evaluate(x_test, y_test, verbose=0, batch_size=test_num)
print('Test loss:', score_r[0])
print('Test accuracy:', score_r[1])

('Test loss:', 0.5254570841789246)
('Test accuracy:', 1.0)
0.4116108051200969


In [97]:
weight_arr_r, bias_arr_r = model_r.get_weights()
weight_magnitude_r = np.linalg.norm(weight_arr_r)
print weight_arr_r
print weight_magnitude_r
print bias_arr_r

[[-0.43916413  0.44116676]
 [-0.3348639   0.3352341 ]
 [ 0.0012521  -0.00256963]
 [-0.30893424  0.31592762]
 [-0.36481038  0.36478618]
 [ 0.22088502 -0.23651724]
 [-0.20047458  0.20897913]
 [-0.17499119  0.17530404]
 [ 0.06792135 -0.06263451]
 [-0.3680093   0.3811466 ]
 [-0.05029045  0.05024424]
 [-0.02812219  0.02831184]
 [-0.08611279  0.08709248]
 [ 0.09188789 -0.08857699]]
1.2847356
[-0.03243397  0.03243397]


In [96]:
#cpp

In [27]:
# expected estimates
bnn_test_losses = []
bnn_test_accuracies = []
for seed_n in seed_ns:
    chains_file = '../cpp_models/cpp_chains/FIFA_2018_Statistics_slp_sm_' + str(seed_n) + '.txt'
    x_test = np.genfromtxt("../../data/kaggle/FIFA_2018_Statistics_x_te_" + str(seed_n) + ".csv", delimiter=',')
    y_test = np.genfromtxt("../../data/kaggle/FIFA_2018_Statistics_y_te_" + str(seed_n) + ".csv", delimiter=',')
    bmodel = kms.slp_sm(num_inputs, num_outputs, layer_sizes)
    bnn_model = bnn.bnn_predictor(bmodel, x_test, y_test, chains_file)
    by_pred = bnn_model.predictions_expectation()
    y_test, by_pred = tools.check_dtypes(y_test, by_pred)
    b_test_loss = metrics.categorical_crossentropy(y_test, by_pred)
    b_test_acc = metrics.categorical_accuracy(y_test, by_pred)
    bnn_test_losses.append(b_test_loss)
    bnn_test_accuracies.append(b_test_acc)

In [29]:
# MPE estimates
bnn_MPE_test_losses = []
bnn_MPE_test_accuracies = []
for seed_n in seed_ns:
    chains_file = '../cpp_models/cpp_chains/FIFA_2018_Statistics_slp_sm_' + str(seed_n) + '.txt'
    x_test = np.genfromtxt("../../data/kaggle/FIFA_2018_Statistics_x_te_" + str(seed_n) + ".csv", delimiter=',')
    y_test = np.genfromtxt("../../data/kaggle/FIFA_2018_Statistics_y_te_" + str(seed_n) + ".csv", delimiter=',')
    bmodel = kms.slp_sm(num_inputs, num_outputs, layer_sizes)
    bnn_model = bnn.bnn_predictor(bmodel, x_test, y_test, chains_file)
    by_pred = bnn_model.MPE_prediction()
    y_test, by_pred = tools.check_dtypes(y_test, by_pred)
    b_test_loss = metrics.categorical_crossentropy(y_test, by_pred)
    b_test_acc = metrics.categorical_accuracy(y_test, by_pred)
    bnn_MPE_test_losses.append(b_test_loss)
    bnn_MPE_test_accuracies.append(b_test_acc)

In [99]:
b_weight_arr, b_bias_arr = bnn_model.model.get_weights()
b_weight_magnitude = np.linalg.norm(b_weight_arr)
print b_weight_arr
print b_weight_magnitude
print b_bias_arr

[[-1.8274463   1.0707233 ]
 [ 1.7094133   2.1497848 ]
 [ 0.16490114  2.1739173 ]
 [-1.3065757  -0.5605142 ]
 [ 0.39965272  1.2336111 ]
 [ 0.20807227  0.36302057]
 [-1.7788959  -1.1387717 ]
 [ 0.49126852  2.0472205 ]
 [-0.01662266  0.9817281 ]
 [-1.993626   -0.17394267]
 [ 1.3757656   1.7654557 ]
 [ 0.14453389 -1.1336504 ]
 [-0.509835   -0.45083138]
 [ 0.32196507  2.0590575 ]]
6.756143
[0.76898944 1.6223899 ]


In [9]:
print MPE_test_losses
print MPE_test_accuracies

[0.646150223639557, 0.5942609350490429, 0.32318632259460517, 0.546431633332441, 0.29450505214739575, 0.287850269842976, 0.2842792626669594, 0.3321624040501262, 0.30422067663949814, 0.32166057952266724]
0.7875


In [30]:
print bnn_test_losses
print bnn_test_accuracies

[0.366062825470115, 0.6284091941729878, 0.24924489948015272, 0.5710904364299889, 0.24501502361292296, 0.3579497096714941, 0.27207251425240053, 0.3570041982039275, 0.34970751339715545, 0.3565284383714001]
[0.75, 0.625, 0.9375, 0.625, 0.9375, 0.8125, 0.875, 0.8125, 0.8125, 0.8125]


In [31]:
print bnn_MPE_test_losses
print bnn_MPE_test_accuracies

[0.8409075339178537, 0.8671017332912981, 0.550785078166951, 0.7411403889394357, 0.25798395642442773, 0.7648644009996977, 0.27389507183596595, 0.6367651840596237, 0.9569289182702858, 0.5288760076862427]
[0.5625, 0.625, 0.75, 0.625, 0.9375, 0.8125, 0.875, 0.8125, 0.625, 0.75]
