In [1]:
#trying to implement ensemble method
#https://datascience.stackexchange.com/questions/27169/taking-average-of-multiple-neural-networks
#mixture of experts 'with kmeans'
#https://en.wikipedia.org/wiki/Mixture_of_experts
#combining models together university of Tartu
#https://courses.cs.ut.ee/MTAT.03.277/2014_fall/uploads/Main/deep-learning-lecture-9-combining-multiple-neural-networks-to-improve-generalization-andres-viikmaa.pdf
import tensorflow as tf
import numpy as np
import keras
from keras.utils import to_categorical
import math
import time

(trX, trY), (teX, teY) = tf.keras.datasets.fashion_mnist.load_data()
trX = trX.reshape(60000, 784)
teX = teX.reshape(10000, 784)

trY = to_categorical(trY)
teY = to_categorical(teY)


print("x_train shape:", trX.shape, "y_train shape:", trY.shape)
print("x_test shape:", teX.shape, "y_test shape:", teY.shape)

total_data_set = np.vstack((trX, teX))
total_label_set = np.vstack((trY, teY))

Using TensorFlow backend.


x_train shape: (60000, 784) y_train shape: (60000, 10)
x_test shape: (10000, 784) y_test shape: (10000, 10)


In [2]:
def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))


def model(X, w_h1, w_h2, w_o):
    h1 = tf.nn.sigmoid(tf.matmul(X, w_h1)) # this is a basic mlp, think 2 stacked logistic regressions
    h = tf.nn.sigmoid(tf.matmul(h1, w_h2))
    #return tf.matmul(h, w_o, name="insertname_here") if we need to use names and save the models
    return tf.matmul(h, w_o) # note that we dont take the softmax at the end because our cost fn does that for us

#taken from https://stackoverflow.com/questions/47518609/for-loop-range-and-interval-how-to-include-last-step
def myRange(start,end,step):
    i = start
    while i < end:
        yield i
        i += step
    yield end
    
def confidence_interval(data):
    #using 95 percent confidence interval
    data_points = len(data)
    average_accuracy = 0
    for i in range(0,data_points):
        average_accuracy = average_accuracy + data[i]/data_points

    print("mean accuracy across k folds: " + str(average_accuracy))

    standard_deviation = 0
    variance = 0
    for i in range(0,data_points):
        variance = variance + ((data[i] - average_accuracy) * (data[i] - average_accuracy))
    
    standard_deviation = np.sqrt(variance/data_points)
    standard_error = standard_deviation/np.sqrt(data_points)

    print("confidence interval: " + str(average_accuracy - 1.96*standard_error) + ", " + str(average_accuracy + 1.96*standard_error))
    return [average_accuracy - 1.96*standard_error, average_accuracy, average_accuracy + 1.96*standard_error]

In [3]:
#saver = tf.train.Saver()

prediction = 0

model_accuracy = 0

models_to_train = 2

models_to_train_begin = 2
models_to_train_end = 10

confidence_interval_across_experiments = []
confidence_interval_across_experiments_time = []
kfold_times = []

epochs_per_model = 3

batch_size = 128

number_of_folds = 10

multiple_experts_accuracies = []
multiple_experts_time = []

# Launch the graph in a session
with tf.Session() as sess:
    for n in range(models_to_train_begin, models_to_train_end + 1):
        models_to_train = n
        multiple_experts_accuracies = []
        multiple_experts_time = []
        for fold in range(0, number_of_folds):
            model_accuracy = 0

            cluster_centers = np.load('kmeansclusters/' + str(models_to_train) + 'fold' + str(fold) + '.npy')

            print("fold number: " + str(fold))
            size_of_fold = int(len(total_data_set)/number_of_folds)
            trX = np.vstack((total_data_set[0:fold*size_of_fold], total_data_set[(fold + 1)*size_of_fold:len(total_data_set)]))
            teX = total_data_set[fold*size_of_fold:(fold + 1)*size_of_fold]

            trY = np.vstack((total_label_set[0:fold*size_of_fold], total_label_set[(fold + 1)*size_of_fold:len(total_label_set)]))
            teY = total_label_set[fold*size_of_fold:(fold + 1)*size_of_fold]

            print("x_train shape:", trX.shape, "y_train shape:", trY.shape)
            print("x_test shape:", teX.shape, "y_test shape:", teY.shape)

            partitioned_train_data = []
            partitioned_test_data = []
            partitioned_train_labels = []
            partitioned_test_labels = []

            for i in range(0,models_to_train):
                partitioned_train_data.append([])
                partitioned_test_data.append([])
                partitioned_train_labels.append([])
                partitioned_test_labels.append([])

            for j in range(0,len(teX)):
                closest_index = 0
                closest_distance = math.inf
                for y in range(0, len(cluster_centers)):
                    temp_distance = np.linalg.norm(cluster_centers[y] - teX[j])
                    if closest_distance > temp_distance:
                        closest_index = y
                        closest_distance = temp_distance
                partitioned_test_data[closest_index].append(teX[j])
                partitioned_test_labels[closest_index].append(teY[j])

            for j in range(0,len(trX)):
                closest_index = 0
                closest_distance = math.inf
                for y in range(0, len(cluster_centers)):
                    temp_distance = np.linalg.norm(cluster_centers[y] - trX[j])
                    if closest_distance > temp_distance:
                        closest_index = y
                        closest_distance = temp_distance
                partitioned_train_data[closest_index].append(trX[j])
                partitioned_train_labels[closest_index].append(trY[j])

            for i in range(0,models_to_train):
                partitioned_train_data[i] = np.vstack(partitioned_train_data[i])
                partitioned_test_data[i] = np.vstack(partitioned_test_data[i])
                partitioned_train_labels[i] = np.vstack(partitioned_train_labels[i])
                partitioned_test_labels[i] = np.vstack(partitioned_test_labels[i])

                print(partitioned_train_data[i].shape)
                print(partitioned_test_data[i].shape)
                print(partitioned_train_labels[i].shape)
                print(partitioned_test_labels[i].shape)

            average_model_accuracy = 0
            aggregated_model_times = 0
            for z in range(0,models_to_train):
                size_h1 = tf.constant(625, dtype=tf.int32)
                size_h2 = tf.constant(300, dtype=tf.int32)

                X = tf.placeholder("float", [None, 784])
                Y = tf.placeholder("float", [None, 10])

                w_h1 = init_weights([784, size_h1]) # create symbolic variables
                w_h2 = init_weights([size_h1, size_h2])
                w_o = init_weights([size_h2, 10])

                py_x = model(X, w_h1, w_h2, w_o)

                trX = partitioned_train_data[z]
                teX = partitioned_test_data[z]
                trY = partitioned_train_labels[z]
                teY = partitioned_test_labels[z]

                cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y)) # compute costs
                train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost) # construct an optimizer
                predict_op = tf.argmax(py_x, 1)
                tf.global_variables_initializer().run()
                for i in range(epochs_per_model):
                    time_start = time.process_time()
                    for start, end in zip(myRange(0, len(trX), batch_size), myRange(batch_size, len(trX)+1, batch_size)):
                        sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})
                    time_stop = time.process_time()
                    aggregated_model_times = aggregated_model_times + (time_stop - time_start)
                    print(i, np.mean(np.argmax(teY, axis=1) ==
                                     sess.run(predict_op, feed_dict={X: teX})))
                model_accuracy = np.sum(np.argmax(teY, axis=1) == sess.run(predict_op, feed_dict={X: teX})) + model_accuracy


            print("accuracy: " + str(model_accuracy/(len(total_data_set)/number_of_folds)))
            multiple_experts_accuracies.append(model_accuracy/(len(total_data_set)/number_of_folds))
            multiple_experts_time.append(aggregated_model_times)

        answer = confidence_interval(multiple_experts_accuracies)
        print(answer)
        confidence_interval_across_experiments.append(answer)
        
        kfold_times.append(multiple_experts_time)
        answer = confidence_interval(multiple_experts_time)
        print(answer)
        confidence_interval_across_experiments_time.append(answer)

print(confidence_interval_across_experiments)
print(confidence_interval_across_experiments_time)
confidence_interval_across_experiments = np.asarray(confidence_interval_across_experiments)
confidence_interval_across_experiments_time = np.asarray(confidence_interval_across_experiments_time)
kfold_times = np.asarray(kfold_times)
np.save("mixtureOfExpertsResultsTimeAllTimes.npy", kfold_times)
np.save("mixtureOfExpertsResultsAccuracy.npy", confidence_interval_across_experiments)
np.save("mixtureOfExpertsResultsTime.npy", confidence_interval_across_experiments_time)

        #saver.save(sess,"mlp/session.ckpt")

fold number: 0
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(30049, 784)
(3303, 784)
(30049, 10)
(3303, 10)
(32951, 784)
(3697, 784)
(32951, 10)
(3697, 10)
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

0 0.6463820768997881
1 0.7699061459279443
2 0.8041174689676052
0 0.6080605896672978
1 0.655937246416013
2 0.6911008926156343
accuracy: 0.7444285714285714
fold number: 1
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(29871, 784)
(3281, 784)
(29871, 10)
(3281, 10)
(33129, 784)
(3719, 784)
(33129, 10)
(3719, 10)
0 0.6610789393477599
1 0.7537336177994514
2 0.7814690643096617
0 0.5477278838397419
1 0.6773326162947029
2 0.6953482118849152
accuracy: 0.7357142857142858
fold number: 2
x_train shape: (63000, 784) y_train shape: (63000, 

(18336, 784)
(2071, 784)
(18336, 10)
(2071, 10)
(25132, 784)
(2775, 784)
(25132, 10)
(2775, 10)
(19532, 784)
(2154, 784)
(19532, 10)
(2154, 10)
0 0.4253983582810237
1 0.5171414775470787
2 0.6185417672621922
0 0.6918918918918919
1 0.7790990990990991
2 0.8
0 0.765552460538533
1 0.7785515320334262
2 0.7776230269266481
accuracy: 0.7394285714285714
fold number: 9
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(18410, 784)
(2054, 784)
(18410, 10)
(2054, 10)
(25117, 784)
(2835, 784)
(25117, 10)
(2835, 10)
(19473, 784)
(2111, 784)
(19473, 10)
(2111, 10)
0 0.4595910418695229
1 0.5559883154819864
2 0.629016553067186
0 0.6500881834215168
1 0.7862433862433862
2 0.7851851851851852
0 0.6290857413548081
1 0.7702510658455708
2 0.7711984841307438
accuracy: 0.7351428571428571
mean accuracy across k folds: 0.7377571428571429
confidence interval: 0.7247559706638366, 0.7507583150504492
[0.7247559706638366, 0.7377571428571429, 0.7507583150504492]
me

(9928, 784)
(1132, 784)
(9928, 10)
(1132, 10)
(18414, 784)
(2019, 784)
(18414, 10)
(2019, 10)
(9301, 784)
(1044, 784)
(9301, 10)
(1044, 10)
(11504, 784)
(1257, 784)
(11504, 10)
(1257, 10)
(13853, 784)
(1548, 784)
(13853, 10)
(1548, 10)
0 0.8498233215547704
1 0.8533568904593639
2 0.8736749116607774
0 0.645864289252105
1 0.7340267459138187
2 0.7498761763249133
0 0.4099616858237548
1 0.5431034482758621
2 0.5459770114942529
0 0.7995226730310262
1 0.8114558472553699
2 0.843277645186953
0 0.42377260981912146
1 0.4405684754521964
2 0.47739018087855295
accuracy: 0.696
fold number: 2
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(11471, 784)
(1251, 784)
(11471, 10)
(1251, 10)
(18394, 784)
(2121, 784)
(18394, 10)
(2121, 10)
(9316, 784)
(1058, 784)
(9316, 10)
(1058, 10)
(9985, 784)
(1061, 784)
(9985, 10)
(1061, 10)
(13834, 784)
(1509, 784)
(13834, 10)
(1509, 10)
0 0.820943245403677
1 0.8353317346123101
2 0.8721023181454837
0 0.6397925506

(11891, 784)
(1281, 784)
(11891, 10)
(1281, 10)
(9318, 784)
(994, 784)
(9318, 10)
(994, 10)
(9601, 784)
(1018, 784)
(9601, 10)
(1018, 10)
(11577, 784)
(1319, 784)
(11577, 10)
(1319, 10)
(8589, 784)
(951, 784)
(8589, 10)
(951, 10)
(12024, 784)
(1437, 784)
(12024, 10)
(1437, 10)
0 0.4137392661982826
1 0.5378610460577674
2 0.6143637782982045
0 0.9436619718309859
1 0.9466800804828974
2 0.9466800804828974
0 0.881139489194499
1 0.8821218074656189
2 0.8821218074656189
0 0.6141015921152388
1 0.8188021228203184
2 0.8369977255496588
0 0.4679284963196635
1 0.6225026288117771
2 0.6319663512092534
0 0.21433542101600556
1 0.5191370911621433
2 0.5845511482254697
accuracy: 0.7387142857142858
fold number: 3
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(12129, 784)
(1304, 784)
(12129, 10)
(1304, 10)
(12148, 784)
(1331, 784)
(12148, 10)
(1331, 10)
(9422, 784)
(1044, 784)
(9422, 10)
(1044, 10)
(8512, 784)
(985, 784)
(8512, 10)
(985, 10)
(11485, 

1 0.8359375
2 0.8359375
0 0.8179571663920923
1 0.8179571663920923
2 0.8640856672158155
0 0.36741214057507987
1 0.4744408945686901
2 0.5399361022364217
accuracy: 0.727
fold number: 2
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(9275, 784)
(988, 784)
(9275, 10)
(988, 10)
(11517, 784)
(1368, 784)
(11517, 10)
(1368, 10)
(11014, 784)
(1191, 784)
(11014, 10)
(1191, 10)
(6648, 784)
(709, 784)
(6648, 10)
(709, 10)
(4644, 784)
(497, 784)
(4644, 10)
(497, 10)
(8644, 784)
(963, 784)
(8644, 10)
(963, 10)
(11258, 784)
(1284, 784)
(11258, 10)
(1284, 10)
0 0.8977732793522267
1 0.9463562753036437
2 0.9453441295546559
0 0.2185672514619883
1 0.47149122807017546
2 0.5847953216374269
0 0.40973971452560876
1 0.4080604534005038
2 0.4164567590260285
0 0.8279266572637518
1 0.8279266572637518
2 0.8279266572637518
0 0.9074446680080482
1 0.9074446680080482
2 0.9074446680080482
0 0.46313603322949115
1 0.5482866043613707
2 0.5638629283489096
0 0.6012461

1 0.5615474794841735
2 0.5896834701055099
0 0.949671772428884
1 0.949671772428884
2 0.949671772428884
0 0.37453874538745385
1 0.37453874538745385
2 0.37453874538745385
0 0.9344552701505757
1 0.9362267493356953
2 0.9362267493356953
0 0.3744725738396624
1 0.4525316455696203
2 0.5221518987341772
0 0.6666666666666666
1 0.7546754675467546
2 0.7843784378437844
0 0.8676470588235294
1 0.8676470588235294
2 0.8676470588235294
accuracy: 0.6781428571428572
fold number: 1
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(8161, 784)
(855, 784)
(8161, 10)
(855, 10)
(8972, 784)
(970, 784)
(8972, 10)
(970, 10)
(7847, 784)
(885, 784)
(7847, 10)
(885, 10)
(4019, 784)
(444, 784)
(4019, 10)
(444, 10)
(9509, 784)
(1042, 784)
(9509, 10)
(1042, 10)
(8405, 784)
(942, 784)
(8405, 10)
(942, 10)
(10005, 784)
(1147, 784)
(10005, 10)
(1147, 10)
(6082, 784)
(715, 784)
(6082, 10)
(715, 10)
0 0.6690058479532164
1 0.7590643274853801
2 0.7976608187134503
0 0.38247

2 0.8830584707646177
0 0.5
1 0.5913978494623656
2 0.5935483870967742
0 0.9279279279279279
1 0.9279279279279279
2 0.9279279279279279
0 0.31995987963891676
1 0.4573721163490471
2 0.5215646940822467
0 0.47139588100686497
1 0.5
2 0.5697940503432495
accuracy: 0.6652857142857143
fold number: 9
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(8109, 784)
(909, 784)
(8109, 10)
(909, 10)
(8938, 784)
(1034, 784)
(8938, 10)
(1034, 10)
(3948, 784)
(450, 784)
(3948, 10)
(450, 10)
(7859, 784)
(876, 784)
(7859, 10)
(876, 10)
(10051, 784)
(1095, 784)
(10051, 10)
(1095, 10)
(8445, 784)
(919, 784)
(8445, 10)
(919, 10)
(9528, 784)
(1014, 784)
(9528, 10)
(1014, 10)
(6122, 784)
(703, 784)
(6122, 10)
(703, 10)
0 0.6776677667766776
1 0.6776677667766776
2 0.7040704070407041
0 0.22243713733075435
1 0.5038684719535783
2 0.5067698259187621
0 0.9155555555555556
1 0.9155555555555556
2 0.9155555555555556
0 0.4805936073059361
1 0.6289954337899544
2 0.654109589

0 0.35518157661647476
1 0.35518157661647476
2 0.3950398582816652
accuracy: 0.6712857142857143
fold number: 6
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(2694, 784)
(294, 784)
(2694, 10)
(294, 10)
(10933, 784)
(1206, 784)
(10933, 10)
(1206, 10)
(7177, 784)
(825, 784)
(7177, 10)
(825, 10)
(9222, 784)
(1084, 784)
(9222, 10)
(1084, 10)
(8159, 784)
(873, 784)
(8159, 10)
(873, 10)
(6591, 784)
(728, 784)
(6591, 10)
(728, 10)
(10485, 784)
(1161, 784)
(10485, 10)
(1161, 10)
(5269, 784)
(537, 784)
(5269, 10)
(537, 10)
(2470, 784)
(292, 784)
(2470, 10)
(292, 10)
0 0.9489795918367347
1 0.9489795918367347
2 0.9489795918367347
0 0.5663349917081261
1 0.8399668325041459
2 0.8582089552238806
0 0.4206060606060606
1 0.4206060606060606
2 0.4206060606060606
0 0.9160516605166051
1 0.9326568265682657
2 0.9317343173431735
0 0.47880870561282934
1 0.6116838487972509
2 0.6311569301260023
0 0.8406593406593407
1 0.8406593406593407
2 0.8406593406593407


0 0.3794358507734304
1 0.3794358507734304
2 0.3794358507734304
0 0.891566265060241
1 0.891566265060241
2 0.891566265060241
0 0.9354838709677419
1 0.9433040078201369
2 0.9442815249266863
0 0.9471947194719472
1 0.9471947194719472
2 0.9471947194719472
0 0.7095959595959596
1 0.8055555555555556
2 0.8459595959595959
accuracy: 0.671
fold number: 3
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(2703, 784)
(285, 784)
(2703, 10)
(285, 10)
(8143, 784)
(864, 784)
(8143, 10)
(864, 10)
(3097, 784)
(365, 784)
(3097, 10)
(365, 10)
(7832, 784)
(900, 784)
(7832, 10)
(900, 10)
(6794, 784)
(802, 784)
(6794, 10)
(802, 10)
(9539, 784)
(1041, 784)
(9539, 10)
(1041, 10)
(10131, 784)
(1102, 784)
(10131, 10)
(1102, 10)
(2477, 784)
(279, 784)
(2477, 10)
(279, 10)
(4529, 784)
(467, 784)
(4529, 10)
(467, 10)
(7755, 784)
(895, 784)
(7755, 10)
(895, 10)
0 0.9438596491228071
1 0.9438596491228071
2 0.9438596491228071
0 0.47453703703703703
1 0.5891203703703703

0 0.8937142857142857
1 0.9497142857142857
2 0.9474285714285714
0 0.25655021834061137
1 0.43231441048034935
2 0.4574235807860262
0 0.5477272727272727
1 0.6238636363636364
2 0.634090909090909
0 0.8583690987124464
1 0.8583690987124464
2 0.8583690987124464
0 0.47884940778341795
1 0.47884940778341795
2 0.48392554991539766
0 0.3868940754039497
1 0.4012567324955117
2 0.46858168761220825
0 0.8945454545454545
1 0.8945454545454545
2 0.8945454545454545
0 0.9552715654952076
1 0.9552715654952076
2 0.9552715654952076
0 0.8151447661469933
1 0.8151447661469933
2 0.8151447661469933
0 0.6914414414414415
1 0.7454954954954955
2 0.7927927927927928
accuracy: 0.6898571428571428
mean accuracy across k folds: 0.6808428571428571
confidence interval: 0.6739352565753662, 0.6877504577103479
[0.6739352565753662, 0.6808428571428571, 0.6877504577103479]
mean accuracy across k folds: 56.41406250000001
confidence interval: 54.145730749210315, 58.6823942507897
[54.145730749210315, 56.41406250000001, 58.6823942507897]
[[