In [1]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import mean_absolute_error

In [2]:
# -Training Dataset-
dataset = np.load('datasets/dataset_train_3.npz')
dataset_x = dataset['X']
dataset_y = dataset['Y']

# Only use first num_samples samples
num_samples = 1000

dataset_x = dataset_x[:num_samples]
dataset_y = dataset_y[:num_samples]

# -Validation Dataset-
test_dataset = np.load("datasets/dataset_test.npz") 
test_dataset_x = test_dataset['X']
test_dataset_y = test_dataset['Y']
test_dataset_y_resh = np.reshape(test_dataset_y, (-1, 1)) 

print(dataset_x.shape)

(1000, 13, 26, 9)


In [3]:
class CNN:

    def __init__(self, name="CNN", writer_name="CNN",
                 l1_num_filt = 16, l1_window = [4,4], l1_strides = [1,1],
                 l2_num_filt = 32, l2_window = [2,2], l2_strides = [1,1],
                 padding_type = "VALID",
                 max_pool_size = [2, 2],
                 max_pool_str = [2, 2],
                 fc_num_units = 256):
        
        with tf.variable_scope(name):
            # Batch of inputs (game states, one-hot encoded)
            self.X = tf.placeholder(tf.float32, [None, 13, 26, 9], name="X") # type tf.float32 is needed for the rest of operations

            # Batch of outputs (correct predictions of number of actions)
            self.Y_corr = tf.placeholder(tf.float32, [None, 1], name="Y")
            
            # Dropout prob
            self.dropout_prob = tf.placeholder(tf.float32, name="dropout_prob")
            
            # Placeholder for batch normalization
            # During training (big batches) -> true, during test (small batches) -> false
            self.is_training = tf.placeholder(tf.bool, name="is_training")

            
            """
            Batch Normalization of inputs
            """
            
            self.X_norm = tf.layers.batch_normalization(self.X, axis = 3, momentum=0.99, training=self.is_training)

            
            """
            First convnet:
            """
            
            # Padding = "VALID" -> no padding, "SAME" -> padding to keep the output dimension the same as the input one
            
            self.conv1 = tf.layers.conv2d(inputs = self.X_norm,
                                         filters = l1_num_filt,
                                         kernel_size = l1_window,
                                         strides = l1_strides,
                                         padding = padding_type,
                                         activation = tf.nn.relu,
                                         use_bias = True,
                                         kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                         name = "conv1")
            
            # Flatten output of conv layers
            
            #self.flatten = tf.contrib.layers.flatten(self.conv1)
            
            # Max pooling
            
            self.conv1 = tf.layers.max_pooling2d(inputs = self.conv1,
                                                pool_size = max_pool_size,
                                                strides = max_pool_str,
                                                padding = "VALID"
                                                )
            
             
            """
            Second convnet:
            """
            
            """self.conv2 = tf.layers.conv2d(inputs = self.conv1,
                                         filters = l2_num_filt,
                                         kernel_size = l2_window,
                                         strides = l2_strides,
                                         padding = padding_type,
                                         activation = tf.nn.relu,
                                         use_bias = True,
                                         kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                         name = "conv2")
            
            # Max pooling
            
            self.conv2 = tf.layers.max_pooling2d(inputs = self.conv2,
                                                pool_size = [2, 2]
                                                strides = [2, 2]
                                                padding = "VALID",
                                                )"""
            
            # Flatten output of conv layers
            
            self.flatten = tf.contrib.layers.flatten(self.conv1)
            
            
            # Fully connected layer
            
            self.fc = tf.layers.dense(inputs = self.flatten,
                                  units = fc_num_units,
                                  activation = tf.nn.relu,
                                  kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                  name="fc")
            
            # Dropout
            
            self.fc = tf.layers.dropout(self.fc, rate=self.dropout_prob)
            
            # Output Layer
            
            self.output = tf.layers.dense(inputs = self.fc, 
                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                          units = 1, 
                                          activation=None)
            
            # Train
            
            self.loss = tf.reduce_mean(tf.square(self.output - self.Y_corr), name="loss") # Quadratic loss
            
            self.optimizer = tf.train.AdamOptimizer(learning_rate=0.002, name="optimizer")
            
            # Mean and Variance Shift Operations needed for Batch Normalization
            self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

            # Execute mean and variance updates of batch norm each training step
            with tf.control_dependencies(self.update_ops):
                self.train_op = self.optimizer.minimize(self.loss, name="train_op")
            
            # Summaries
            self.train_loss_sum = tf.summary.scalar('train_loss', self.loss) # Training loss
            self.test_loss_sum = tf.summary.scalar('test_loss', self.loss) # Validation loss
            
            self.writer = tf.summary.FileWriter("CNN_pruebas3/" + writer_name)
            self.writer.add_graph(tf.get_default_graph())
            
            
        # Get Moving Mean and Variance of Batch Norm
        with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
            self.get_mov_mean = tf.get_variable('batch_normalization/moving_mean')
            self.get_mov_var = tf.get_variable('batch_normalization/moving_variance')


In [4]:
# Number of epochs
num_epochs = 75
# Minibatch size
batch_size = 64 # Mejor tamaño que 128

# Test Hyperparameters
# out_units = [16, 32, 64, 128] # Output Units
# num_filts = [2, 4, 8, 16] # Number of filters layer 1
# win_sizes = [[2,2], [4,4]] # Window Size
# strides = [[1,1], [2,2], [4,4]] # Stride Size
# paddings = ['VALID', 'SAME']

out_units = [16] # Output Units
num_filts = [2] # Number of filters layer 1
win_sizes = [[4,4]] # Window Size
strides = [[2,2]] # Stride Size
paddings = ['SAME']

# Pooling
pool_win = [2, 2]
pool_str = [1, 1] # Overlapping Pooling
    
# Batch Sizes
batch_sizes = [64]

# Dropout probability
drop_prob = 0.5

for batch_size in batch_sizes:
    for num_filt in num_filts:
        for win_size in win_sizes:
            for stride in strides:
                for out_unit in out_units:
                    for num_prueba in range(1, 11):
                    
                    #if stride[0] <= win_size[0]: # Never use strides bigger than window size
                    
                        cnn_name = "CNN"
                        #writer_name = "Dataset=1000_dropout={}_Test{}_max_pool=[{},str={}]_out={}_filt={}_win={}_str={}_pad={}".format(drop_prob, num_prueba, pool_win, pool_str, out_unit, num_filt, win_size, stride, 'SAME')
                        writer_name = "units={}, T={}".format(out_unit, num_prueba)
                        
                        # Reset Default Graph
                        tf.reset_default_graph()

                        # Create CNN Architecture to test
                        CNN_test = CNN(name=cnn_name, writer_name=writer_name,
                                       l1_num_filt = num_filt, l1_window = win_size, l1_strides = stride,
                                       padding_type = 'SAME',
                                       fc_num_units = out_unit,
                                       max_pool_size = pool_win,
                                       max_pool_str = pool_str) 

                        with tf.Session() as sess:
                            sess.run(tf.global_variables_initializer()) # Initialize all variables

                            rkf = RepeatedKFold(n_splits=int(len(dataset_y) // batch_size),
                                              n_repeats=num_epochs, random_state=41982) # Get randomized indexes for minibatches

                            # Feed Dict for validation (uses validation set)
                            data_dict_test = {CNN_test.X:test_dataset_x, CNN_test.Y_corr:test_dataset_y_resh,
                                             CNN_test.dropout_prob:0.0, CNN_test.is_training : False}

                            # Train the model

                            it = 0

                            for _, batch_index in rkf.split(dataset_y):
                                batch_x = np.take(dataset_x, batch_index, axis=0) # Obtain current training batch
                                batch_y = np.take(dataset_y, batch_index)
                                batch_y = np.reshape(batch_y, (-1, 1))

                                data_dict = {CNN_test.X:batch_x, CNN_test.Y_corr:batch_y,
                                             CNN_test.dropout_prob:drop_prob, CNN_test.is_training : True}

                                sess.run(CNN_test.train_op, feed_dict=data_dict) # Execute one training step
                                
                                # Print Batch Norm parameters
                                #mean, variance = sess.run([CNN_test.get_mov_mean, CNN_test.get_mov_var])
                                
                                #print("Mean: {}, Variance: {}".format(mean, variance))

                                # Periodically check losses
                                if it % 5 == 0:                
                                    sum1 = sess.run(CNN_test.test_loss_sum, feed_dict=data_dict_test)
                                    CNN_test.writer.add_summary(sum1, it)
                                    sum2 = sess.run(CNN_test.train_loss_sum, feed_dict=data_dict)
                                    CNN_test.writer.add_summary(sum2, it)   

                                it += 1

                            # ----- VALIDATION -----

                            # Compute predictions
                            test_pred = sess.run(CNN_test.output, feed_dict=data_dict_test)

                            mae_base_test = mean_absolute_error(test_dataset_y, np.repeat(np.mean(dataset_y), test_dataset_y.shape[0]))
                            mae_model_test = mean_absolute_error(test_dataset_y, test_pred)

                            print("\n\n---MAE VALIDATION--\n\n")
                            print("Model:", mae_model_test)
                            print("Baseline:", mae_base_test)
    



---MAE VALIDATION--


Model: 4.665367329149926
Baseline: 7.0924987714987715


---MAE VALIDATION--


Model: 5.792958262950841
Baseline: 7.0924987714987715


---MAE VALIDATION--


Model: 6.377713144850673
Baseline: 7.0924987714987715


---MAE VALIDATION--


Model: 5.1461900821189035
Baseline: 7.0924987714987715


---MAE VALIDATION--


Model: 5.166227659956536
Baseline: 7.0924987714987715


---MAE VALIDATION--


Model: 4.845161698081276
Baseline: 7.0924987714987715


---MAE VALIDATION--


Model: 6.278943443766976
Baseline: 7.0924987714987715


---MAE VALIDATION--


Model: 7.682768921301465
Baseline: 7.0924987714987715


---MAE VALIDATION--


Model: 4.135822339637859
Baseline: 7.0924987714987715


---MAE VALIDATION--


Model: 4.2853537707129625
Baseline: 7.0924987714987715


---MAE VALIDATION--


Model: 5.4012640178642926
Baseline: 7.0924987714987715


---MAE VALIDATION--


Model: 4.371169581167235
Baseline: 7.0924987714987715


---MAE VALIDATION--


Model: 4.136593408608027
Baseline: 7.