In [56]:
#trying to implement ensemble method
#https://datascience.stackexchange.com/questions/27169/taking-average-of-multiple-neural-networks
#mixture of experts 'with kmeans'
#https://en.wikipedia.org/wiki/Mixture_of_experts
#combining models together university of Tartu
#https://courses.cs.ut.ee/MTAT.03.277/2014_fall/uploads/Main/deep-learning-lecture-9-combining-multiple-neural-networks-to-improve-generalization-andres-viikmaa.pdf
import tensorflow as tf
import numpy as np
import keras
from keras.utils import to_categorical

(trX, trY), (teX, teY) = tf.keras.datasets.fashion_mnist.load_data()
trX = trX.reshape(60000, 784)
teX = teX.reshape(10000, 784)

trY = to_categorical(trY)
teY = to_categorical(teY)


print("x_train shape:", trX.shape, "y_train shape:", trY.shape)
print("x_test shape:", teX.shape, "y_test shape:", teY.shape)

total_data_set = np.vstack((trX, teX))
total_label_set = np.vstack((trY, teY))

x_train shape: (60000, 784) y_train shape: (60000, 10)
x_test shape: (10000, 784) y_test shape: (10000, 10)


In [57]:
def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))


def model(X, w_h1, w_h2, w_o):
    h1 = tf.nn.sigmoid(tf.matmul(X, w_h1)) # this is a basic mlp, think 2 stacked logistic regressions
    h = tf.nn.sigmoid(tf.matmul(h1, w_h2))
    #return tf.matmul(h, w_o, name="insertname_here") if we need to use names and save the models
    return tf.matmul(h, w_o) # note that we dont take the softmax at the end because our cost fn does that for us

#taken from https://stackoverflow.com/questions/47518609/for-loop-range-and-interval-how-to-include-last-step
def myRange(start,end,step):
    i = start
    while i < end:
        yield i
        i += step
    yield end

In [66]:
#saver = tf.train.Saver()

prediction = 0

model_accuracy = 0

models_to_train = 5

cluster_centers = np.load('kmeansclusters/' + str(models_to_train) + '.npy')
cluster_labels = np.load('kmeanslabels/' + str(models_to_train) + '.npy')

epochs_per_model = 1

batch_size = 128

number_of_folds = 10

multiple_experts_accuracies = []

# Launch the graph in a session
with tf.Session() as sess:
    for fold in range(0,number_of_folds):
        model_accuracy = 0
        
        print("fold number: " + str(fold))
        size_of_fold = int(len(total_data_set)/number_of_folds)
        trX = np.vstack((total_data_set[0:fold*size_of_fold], total_data_set[(fold + 1)*size_of_fold:len(total_data_set)]))
        teX = total_data_set[fold*size_of_fold:(fold + 1)*size_of_fold]
        
        trY = np.vstack((total_label_set[0:fold*size_of_fold], total_label_set[(fold + 1)*size_of_fold:len(total_label_set)]))
        teY = total_label_set[fold*size_of_fold:(fold + 1)*size_of_fold]
        
        print("x_train shape:", trX.shape, "y_train shape:", trY.shape)
        print("x_test shape:", teX.shape, "y_test shape:", teY.shape)
        
        partitioned_train_data = []
        partitioned_test_data = []
        partitioned_train_labels = []
        partitioned_test_labels = []

        for i in range(0,models_to_train):
            partitioned_train_data.append([])
            partitioned_test_data.append([])
            partitioned_train_labels.append([])
            partitioned_test_labels.append([])
            
        for i in range(0,models_to_train):
            for j in range(0,len(trX)):
                if cluster_labels[j] == i:
                    partitioned_train_data[i].append(trX[j])
                    partitioned_train_labels[i].append(trY[j])
            #for j in range(0,len(teX)):
                #if cluster_labels[j + len(trX)] == i:
                    #partitioned_test_data[i].append(teX[j])
                    #partitioned_test_labels[i].append(teY[j]) should have close to 30 percent accuracy
        for j in range(0,len(teX)):
            closest_index = 0
            closest_distance = math.inf
            for y in range(0, cluster_centers):
                temp_distance = np.norm(cluster_centers[y], teX[j])
                if closest_distance > temp_distance:
                    closest_index = y
            partitioned_test_data[closest_index].append(teX[j])
            partitioned_test_labels[closest_index].append(teY[j])
        #try out the above 9 lines of code

            partitioned_train_data[i] = np.vstack(partitioned_train_data[i])
            partitioned_test_data[i] = np.vstack(partitioned_test_data[i])
            partitioned_train_labels[i] = np.vstack(partitioned_train_labels[i])
            partitioned_test_labels[i] = np.vstack(partitioned_test_labels[i])

            print(partitioned_train_data[i].shape)
            print(partitioned_test_data[i].shape)
            print(partitioned_train_labels[i].shape)
            print(partitioned_test_labels[i].shape)
        
        average_model_accuracy = 0
        for z in range(0,models_to_train):
            size_h1 = tf.constant(625, dtype=tf.int32)
            size_h2 = tf.constant(300, dtype=tf.int32)

            X = tf.placeholder("float", [None, 784])
            Y = tf.placeholder("float", [None, 10])

            w_h1 = init_weights([784, size_h1]) # create symbolic variables
            w_h2 = init_weights([size_h1, size_h2])
            w_o = init_weights([size_h2, 10])

            py_x = model(X, w_h1, w_h2, w_o)

            trX = partitioned_train_data[z]
            teX = partitioned_test_data[z]
            trY = partitioned_train_labels[z]
            teY = partitioned_test_labels[z]

            cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y)) # compute costs
            train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost) # construct an optimizer
            predict_op = tf.argmax(py_x, 1)
            tf.global_variables_initializer().run()
            for i in range(epochs_per_model):
                for start, end in zip(myRange(0, len(trX), batch_size), myRange(batch_size, len(trX)+1, batch_size)):
                    sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})
                print(i, np.mean(np.argmax(teY, axis=1) ==
                                 sess.run(predict_op, feed_dict={X: teX})))
            model_accuracy = np.sum(np.argmax(teY, axis=1) == sess.run(predict_op, feed_dict={X: teX})) + model_accuracy

            
        print("accuracy: " + str(model_accuracy/(len(total_data_set)/number_of_folds)))
        multiple_experts_accuracies.append(model_accuracy/(len(total_data_set)/number_of_folds))
        

average_accuracy = 0
for i in multiple_experts_accuracies:
    average_accuracy = average_accuracy + i/number_of_folds

print("mean accuracy across k folds: " + str(average_accuracy))

standard_deviation = 0
variance = 0
for i in multiple_experts_accuracies:
    variance = variance + ((i - average_accuracy) * (i - average_accuracy))
    
standard_deviation = np.sqrt(variance/number_of_folds)
standard_error = standard_deviation/np.sqrt(number_of_folds)

#using 95 percent confidence interval



print("confidence interval: " + str(average_accuracy - 1.96*standard_error) + ", " + str(average_accuracy + 1.96*standard_error))

        
        #saver.save(sess,"mlp/session.ckpt")

fold number: 0
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(18383, 784)
(2094, 784)
(18383, 10)
(2094, 10)
(13820, 784)
(1542, 784)
(13820, 10)
(1542, 10)
(9938, 784)
(1111, 784)
(9938, 10)
(1111, 10)
(11522, 784)
(1224, 784)
(11522, 10)
(1224, 10)
(9337, 784)
(1029, 784)
(9337, 10)
(1029, 10)
0 0.5482330468003821
0 0.3009079118028534
0 0.40594059405940597
0 0.184640522875817
0 0.3848396501457726
accuracy: 0.38357142857142856
fold number: 1
x_train shape: (63000, 784) y_train shape: (63000, 10)
x_test shape: (7000, 784) y_test shape: (7000, 10)
(18383, 784)
(2094, 784)
(18383, 10)
(2094, 10)
(13820, 784)
(1542, 784)
(13820, 10)
(1542, 10)
(9938, 784)
(1111, 784)
(9938, 10)
(1111, 10)
(11522, 784)
(1224, 784)
(11522, 10)
(1224, 10)
(9337, 784)
(1029, 784)
(9337, 10)
(1029, 10)
0 0.4665711556829035
0 0.25810635538262
0 0.33483348334833485
0 0.190359477124183
0 0.2925170068027211
accuracy: 0.32585714285714285
fold number: 2
x_t

Exception ignored in: <bound method BaseSession.__del__ of <tensorflow.python.client.session.Session object at 0x000001D121C99320>>
Traceback (most recent call last):
  File "c:\users\nick\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py", line 707, in __del__
    tf_session.TF_DeleteSession(self._session)
KeyboardInterrupt: 


KeyboardInterrupt: 