## Comparison Between Different Learning Optimizers
---

### Optimizers in Experiment
1. Gradient Descent
2. Adagrad
3. RMSProp

### 1. Load modules and datasets

In [None]:
# Set path of this notebook to the root directory
import os
os.chdir(os.path.join(os.getcwd(), '..'))
print 'Current working directory', os.getcwd()

In [None]:
# For auto-reloading external modules
%load_ext autoreload
%autoreload 2

# Import modules
import pandas as pd
import numpy as np
import data_utils as du
import neural_network as nn
from sklearn.model_selection import KFold


# Prepare datasets for processing
du.maybe_pickle('train.csv')
du.maybe_pickle('test.csv')

train_dataset = du.load_dataset('train')
test_dataset = du.load_dataset('test')

print
print 'Train dataset has shape =', train_dataset[0].shape
print 'Test dataset has shape =', test_dataset[0].shape

### 2. Configure Network

In [None]:
hidden_dim = (1024, 512, 64)
learning_rate = 0.001
num_steps = 5000
batch_size = 256
val_folds = 7    # Only fold 0 will be used (no cross-validation)

# Configure the following lines in affine_layer_variables() in layer_utils.py
# w_init = tf.truncated_normal([matrix_dim0, matrix_dim1], stddev=0.1)
# b_init = tf.truncated_normal([matrix_dim1], stddev=0.1)

### 3. Prepare Testing Variables and Functions

In [None]:
def run_network(optimizer):
    
    kf = KFold(n_splits=val_folds)
    train_idx, val_idx = kf.split(train_dataset[0]).next()
    
    
    clf = nn.NeuralNetwork(hidden_dim=hidden_dim, optimizer=optimizer, 
                 learning_rate=learning_rate, num_steps=num_steps,
                 batch_size=batch_size)
    
    train = du.get_batch(train_dataset, indices=train_idx)
    val = du.get_batch(train_dataset, indices=val_idx)
    
    clf.fit(train, val, 'log/opt/' + optimizer + '/')
    train_acc = clf.accuracy(train)
    val_acc = clf.accuracy(val)

    print 'accuracy on train dataset = %.4f' % train_acc
    print 'accuracy on validation dataset = %.4f' % val_acc

### 4. Run Network for Every Initialization Method

In [None]:
optimizers = [ 'GradientDescentOptimizer', 'AdagradOptimizer', 'RMSPropOptimizer']

for opt_name in optimizers:
    print "Running " + opt_name
    print "================================="
    run_network(opt_name)

### 5. Plotting Results

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def getData(path):
    """
        Returns data from CSV file exported from Tensorboard. The returned data is an np array.
    """
    data = pd.read_csv(path).iloc[:, 1:].as_matrix()
    return data

In [None]:
# Plot loss for all optimizers
opt_names = ['Gradient Descent', 'Adagrad', 'RMS Prop']
colors = ['#F1653C', '#83FF33', '#0B2D5A']
for idx, opt in enumerate(optimizers):
    loss_data = getData('plotting/data_csv/opt/loss/' + opt + '.csv')
    x, y = loss_data[:, 0], loss_data[:, 1]
    plt.plot(x, y, color=colors[idx], label=opt_names[idx], zorder=-idx)
plt.title('Loss Function for Optimizers')
plt.legend()
plt.xlabel('steps')
plt.ylabel('loss')
plt.axis([0, 5000, 0, 1.5])
plt.grid(True)
plt.show()

In [None]:
# Plot val accuracies for all optimizers
opt_names = ['Gradient Descent', 'Adagrad', 'RMS Prop']
colors = ['#F1653C', '#83FF33', '#0B2D5A']
for idx, opt in enumerate(optimizers):
    val_data = getData('plotting/data_csv/opt/val_acc/' + opt + '.csv')
    x, y = val_data[:, 0], val_data[:, 1]
    plt.plot(x, y, color=colors[idx], label=opt_names[idx], zorder=-idx)
plt.title('Validation Accuracy for Optimizers')
plt.legend(bbox_to_anchor=(0., .12, 1., .102))

plt.xlabel('steps')
plt.ylabel('validation accuracy')
plt.axis([0, 5000, 0.5, 1])
plt.grid(True)
plt.show()

In [None]:
# For best optimizer plot train accuracy vs validation accuracy
# Plot val accuracies for all optimizers
opt = 'RMSPropOptimizer'
opt_name = 'RMS Prop'
train_data = getData('plotting/data_csv/opt/train_acc/' + opt + '.csv')
val_data = getData('plotting/data_csv/opt/val_acc/' + opt + '.csv')

x1, y1 = train_data[:, 0], train_data[:, 1]
x2, y2 = val_data[:, 0], val_data[:, 1]
plt.plot(x1, y1, color='#CD2626', label='Training')
plt.plot(x2, y2, color='#08B200', label='Validation')
plt.title('Training vs Validation Accuracies for ' + opt_name)
plt.legend(bbox_to_anchor=(0., .12, 1., .102))
plt.xlabel('steps')
plt.ylabel('accuracy')
plt.axis([0, 5000, 0.5, 1.01])
plt.grid(True)
plt.show()