## Comparison Between Different Initialization Methods
---

### Methods in Experiment
1. weights and biases: truncated Normal Distribution with std = 0.1
2. weights: truncated Normal Distribution with std = 0.1, biases: 0
3. weights: truncated Normal Distribution with std = 0.01, biases: 0
4. weights: truncated Normal Distribution with std = 0.1 divided by sqrt(fan_in/2), biases: 0
5. weights: truncated Normal Distribution with std = 0.1 divided by sqrt(fan_in/2), biases: 0

### 1. Load modules and datasets

In [None]:
# Set path of this notebook to the root directory
import os
os.chdir(os.path.join(os.getcwd(), '..'))
print 'Current working directory', os.getcwd()

In [None]:
# For auto-reloading external modules
%load_ext autoreload
%autoreload 2

# Import modules
import pandas as pd
import numpy as np
import data_utils as du
import neural_network as nn
from sklearn.model_selection import KFold


# Prepare datasets for processing
du.maybe_pickle('train.csv')
du.maybe_pickle('test.csv')

train_dataset = du.load_dataset('train')
test_dataset = du.load_dataset('test')

print
print 'Train dataset has shape =', train_dataset[0].shape
print 'Test dataset has shape =', test_dataset[0].shape

### 2. Configure Network

In [None]:
hidden_dim = (1024, 512, 64)
learning_rate = 0.001
num_steps = 5000
batch_size = 256
optimizer = 'RMSPropOptimizer'
val_folds = 7

### 3. Prepare Testing Variables and Functions

In [None]:
# Collect val accuracies for all method to plot them
methods_val_accs = []

In [None]:
import shutil

def run_network(method_name):
    
    kf = KFold(n_splits=val_folds)
    train_accs, val_accs = [], []
    itr_num = 0
    for train_idx, val_idx in kf.split(train_dataset[0]):
        while(True):
            clf = nn.NeuralNetwork(hidden_dim=hidden_dim, optimizer=optimizer, 
                         learning_rate=learning_rate, num_steps=num_steps,
                         batch_size=batch_size)
            itr_train = du.get_batch(train_dataset, indices=train_idx)
            itr_val = du.get_batch(train_dataset, indices=val_idx)

            clf.fit(itr_train, itr_val, 'log/init/' + method_name + '/' + str(itr_num) + '/')
            itr_train_acc = clf.accuracy(itr_train)
            itr_val_acc = clf.accuracy(itr_val)
            if(itr_train_acc < 0.98):
                shutil.rmtree('log/init/' + method_name + '/' + str(itr_num) + '/')
                continue
            itr_num += 1

            print 'Iteration %d\n============' % itr_num

            train_accs.append(itr_train_acc)
            print 'accuracy on train dataset = %.4f' % itr_train_acc


            val_accs.append(itr_val_acc)
            print 'accuracy on validation dataset = %.4f' % itr_val_acc

            print
            
            break

    train_acc = np.mean(train_accs)
    print 'average accuracy on train dataset = %.4f' % train_acc

    val_acc = np.mean(val_accs)
    print 'average accuracy on validation dataset = %.4f' % val_acc
    
    methods_val_accs.append(val_accs)

### 4. Run Network for Every Initialization Method

In [None]:
# Running network for Method 1

# Manual Modification: Set the following in affine_layer_variables() in layer_utils.py:
# w_init = tf.truncated_normal([matrix_dim0, matrix_dim1], stddev=0.1)
# b_init = tf.truncated_normal([matrix_dim1], stddev=0.1)

run_network('method1')

In [None]:
# Running network for Method 2

# Manual Modification: Set the following in affine_layer_variables() in layer_utils.py:
# w_init = tf.truncated_normal([matrix_dim0, matrix_dim1], stddev=0.1)
# b_init = tf.zeros([matrix_dim1])

run_network('method2')

In [None]:
# Running network for Method 3

# Manual Modification: Set the following in affine_layer_variables() in layer_utils.py:
# w_init = tf.truncated_normal([matrix_dim0, matrix_dim1], stddev=0.01)
# b_init = tf.zeros([matrix_dim1])

run_network('method3')

In [None]:
# Running network for Method 4

# Manual Modification: Set the following in affine_layer_variables() in layer_utils.py:
# w_init = tf.divide(tf.truncated_normal([matrix_dim0, matrix_dim1], stddev=0.01), tf.sqrt(matrix_dim0 / 2.0))
# b_init = tf.zeros([matrix_dim1])

run_network('method4')

In [None]:
# Running network for Method 5

# Manual Modification: Set the following in affine_layer_variables() in layer_utils.py:
# w_init = tf.divide(tf.truncated_normal([matrix_dim0, matrix_dim1], stddev=0.1), tf.sqrt(matrix_dim0 / 2.0))
# b_init = tf.zeros([matrix_dim1])

run_network('method5')

In [None]:
for val_accs in methods_val_accs:
    print val_accs

### 5. Plotting Results

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def getData(path):
    """
        Returns data from CSV file exported from Tensorboard. The returned data is an np array.
    """
    data = pd.read_csv(path).iloc[:, 1:].as_matrix()
    return data

In [None]:
# Plot cross-validation accuracies with std for all methods
accs = methods_val_accs
acc_mean = np.array([np.mean(v) for v in accs])
acc_std = np.array([np.std(v) for v in accs])

for method_idx, acc in enumerate(accs):
    plt.scatter([method_idx + 1] * len(accs[method_idx]), accs[method_idx], color='red')
plt.errorbar(range(1, len(accs) + 1), acc_mean, yerr=acc_std, color='green', ecolor='orange')
plt.title('Cross-validation on Initialization Method')
plt.xlabel('Method Index')
plt.ylabel('Cross-validation accuracy')
plt.grid()
plt.show()

In [None]:
# For best method, plot train accuracy vs val accuracy (for a single iteration)
train_data = getData('plotting/data_csv/init/best_train_acc.csv')
val_data = getData('plotting/data_csv/init/best_val_acc.csv')

x1, y1 = train_data[:, 0], train_data[:, 1]
x2, y2 = val_data[:, 0], val_data[:, 1]

plt.plot(x1, y1, color='#FF5733', label='Train')
plt.plot(x2, y2, color='#83FF33', label='Validation')
plt.legend()
plt.axis([0, 5000, 0.8, 1.1])
plt.xlabel('steps')
plt.ylabel('accuracy')
plt.grid(True)
plt.show()

In [None]:
# For best method, plot a histogram for weights of layers