In [56]:
#trying to implement ensemble method
#https://datascience.stackexchange.com/questions/27169/taking-average-of-multiple-neural-networks
#mixture of experts 'with kmeans'
#https://en.wikipedia.org/wiki/Mixture_of_experts
#combining models together university of Tartu
#https://courses.cs.ut.ee/MTAT.03.277/2014_fall/uploads/Main/deep-learning-lecture-9-combining-multiple-neural-networks-to-improve-generalization-andres-viikmaa.pdf
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import keras
from keras.utils import to_categorical

(trX, trY), (teX, teY) = tf.keras.datasets.fashion_mnist.load_data()
trX = trX.reshape(60000, 784)
teX = teX.reshape(10000, 784)

trY = to_categorical(trY)
teY = to_categorical(teY)


print("x_train shape:", trX.shape, "y_train shape:", trY.shape)
print("x_test shape:", teX.shape, "y_test shape:", teY.shape)

total_data_set = np.vstack((trX, teX))
total_label_set = np.vstack((trY, teY))

x_train shape: (60000, 784) y_train shape: (60000, 10)
x_test shape: (10000, 784) y_test shape: (10000, 10)


In [57]:
def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))


def model(X, w_h1, w_h2, w_o):
    h1 = tf.nn.sigmoid(tf.matmul(X, w_h1)) # this is a basic mlp, think 2 stacked logistic regressions
    h = tf.nn.sigmoid(tf.matmul(h1, w_h2))
    #return tf.matmul(h, w_o, name="insertname_here") if we need to use names and save the models
    return tf.matmul(h, w_o) # note that we dont take the softmax at the end because our cost fn does that for us

#taken from https://stackoverflow.com/questions/47518609/for-loop-range-and-interval-how-to-include-last-step
def myRange(start,end,step):
    i = start
    while i < end:
        yield i
        i += step
    yield end

In [58]:
cluster_centers = np.load('kmeansclusters/' + str(models_to_train) + '.npy')
cluster_labels = np.load('kmeanslabels/' + str(models_to_train) + '.npy')


In [60]:
#saver = tf.train.Saver()

prediction = 0

model_accuracy = 0

models_to_train = 5

epochs_per_model = 10

batch_size = 128

number_of_folds = 10

multiple_experts_accuracies = []

# Launch the graph in a session
with tf.Session() as sess:
    for fold in range(0,number_of_folds):
        model_accuracy = 0
        
        print("fold number: " + str(fold))
        size_of_fold = int(len(total_data_set)/number_of_folds)
        trX = np.vstack((total_data_set[0:fold*size_of_fold], total_data_set[(fold + 1)*size_of_fold:len(total_data_set)]))
        teX = total_data_set[fold*size_of_fold:(fold + 1)*size_of_fold]
        
        trY = np.vstack((total_label_set[0:fold*size_of_fold], total_label_set[(fold + 1)*size_of_fold:len(total_label_set)]))
        teY = total_label_set[fold*size_of_fold:(fold + 1)*size_of_fold]
        
        print("x_train shape:", trX.shape, "y_train shape:", trY.shape)
        print("x_test shape:", teX.shape, "y_test shape:", teY.shape)
        
        partitioned_train_data = []
        partitioned_test_data = []
        partitioned_train_labels = []
        partitioned_test_labels = []

        for i in range(0,models_to_train):
            partitioned_train_data.append([])
            partitioned_test_data.append([])
            partitioned_train_labels.append([])
            partitioned_test_labels.append([])
            
        for i in range(0,models_to_train):
            for j in range(0,len(trX)):
                if cluster_labels[j] == i:
                    partitioned_train_data[i].append(trX[j])
                    partitioned_train_labels[i].append(trY[j])
            for j in range(0,len(teX)):
                if cluster_labels[j + len(trX)] == i:
                    partitioned_test_data[i].append(teX[j])
                    partitioned_test_labels[i].append(teY[j])

            partitioned_train_data[i] = np.vstack(partitioned_train_data[i])
            partitioned_test_data[i] = np.vstack(partitioned_test_data[i])
            partitioned_train_labels[i] = np.vstack(partitioned_train_labels[i])
            partitioned_test_labels[i] = np.vstack(partitioned_test_labels[i])

            print(partitioned_train_data[i].shape)
            print(partitioned_test_data[i].shape)
            print(partitioned_train_labels[i].shape)
            print(partitioned_test_labels[i].shape)
        
        average_model_accuracy = 0
        for z in range(0,models_to_train):
            size_h1 = tf.constant(625, dtype=tf.int32)
            size_h2 = tf.constant(300, dtype=tf.int32)

            X = tf.placeholder("float", [None, 784])
            Y = tf.placeholder("float", [None, 10])

            w_h1 = init_weights([784, size_h1]) # create symbolic variables
            w_h2 = init_weights([size_h1, size_h2])
            w_o = init_weights([size_h2, 10])

            py_x = model(X, w_h1, w_h2, w_o)

            trX = partitioned_train_data[z]
            teX = partitioned_test_data[z]
            trY = partitioned_train_labels[z]
            teY = partitioned_test_labels[z]

            cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y)) # compute costs
            train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost) # construct an optimizer
            predict_op = tf.argmax(py_x, 1)
            tf.global_variables_initializer().run()
            for i in range(epochs_per_model):
                for start, end in zip(myRange(0, len(trX), batch_size), myRange(batch_size, len(trX)+1, batch_size)):
                    sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})
                print(i, np.mean(np.argmax(teY, axis=1) ==
                                 sess.run(predict_op, feed_dict={X: teX})))
            model_accuracy = np.sum(np.argmax(teY, axis=1) == sess.run(predict_op, feed_dict={X: teX})) + model_accuracy

            
        print("accuracy: " + str(model_accuracy/10000))
        multiple_experts_accuracies.append(model_accuracy/10000)
        
        #saver.save(sess,"mlp/session.ckpt")

fold number: 0
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(18383, 784)
(2094, 784)
(18383, 10)
(2094, 10)
(13820, 784)
(1542, 784)
(13820, 10)
(1542, 10)
(9938, 784)
(1111, 784)
(9938, 10)
(1111, 10)
(11522, 784)
(1224, 784)
(11522, 10)
(1224, 10)
(9337, 784)
(1029, 784)
(9337, 10)
(1029, 10)
0 0.5339063992359121
1 0.6485195797516714
2 0.7010506208213945
3 0.7340019102196753
4 0.7602674307545367
5 0.76981852913085
6 0.775549188156638
7 0.7960840496657116
8 0.792741165234002
9 0.7979942693409742
0 0.29507133592736706
1 0.45590142671854733
2 0.6459143968871596
3 0.6900129701686122
4 0.7302204928664072
5 0.748378728923476
6 0.7607003891050583
7 0.7691309987029832
8 0.7782101167315175
9 0.7788586251621271
0 0.4365436543654365
1 0.47974797479747977
2 0.540954095409541
3 0.5607560756075608
4 0.6039603960396039
5 0.6615661566156615
6 0.7155715571557155
7 0.729072907290729
8 0.7317731773177317
9 0.7353735373537353
0 0.1854575163398

6 0.6421568627450981
7 0.6470588235294118
8 0.6552287581699346
9 0.6617647058823529
0 0.1933916423712342
1 0.2925170068027211
2 0.3965014577259475
3 0.4606413994169096
4 0.4878522837706511
5 0.5150631681243926
6 0.5451895043731778
7 0.5626822157434402
8 0.5801749271137027
9 0.6180758017492711
accuracy: 0.4917
fold number: 6
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(18383, 784)
(2094, 784)
(18383, 10)
(2094, 10)
(13820, 784)
(1542, 784)
(13820, 10)
(1542, 10)
(9938, 784)
(1111, 784)
(9938, 10)
(1111, 10)
(11522, 784)
(1224, 784)
(11522, 10)
(1224, 10)
(9337, 784)
(1029, 784)
(9337, 10)
(1029, 10)
0 0.3233046800382044
1 0.545367717287488
2 0.6289398280802292
3 0.6361031518624641
4 0.6752626552053486
5 0.6910219675262655
6 0.711556829035339
7 0.7292263610315186
8 0.7340019102196753
9 0.7464183381088825
0 0.3125810635538262
1 0.41180285343709466
2 0.433852140077821
3 0.446822308690013
4 0.45460440985732814
5 0.485732814526588

KeyboardInterrupt: 