# 0 - Import packages:

In [1]:
import pandas as pd
import numpy as np
import pylab as pl
from matplotlib import collections  as mc
from sklearn.model_selection import StratifiedKFold
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import tensorflow as tf

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 5.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

# 1 - Load data:

## 1.1 - Train set: 

In [None]:
df_data_1 = pd.read_csv("../common/albertom/train_test_split/X_train_pca.csv", index_col="Unnamed: 0")
# df_data_1 = df_data_1.head(500)
print(df_data_1.shape)
df_data_1.head()

## 1.2 - Train labels:

In [None]:
df_target_data_1 = pd.read_csv("../common/albertom/train_test_split/y_train.csv", index_col="Unnamed: 0")
# df_target_data_1 = df_target_data_1.head(500)
print(df_target_data_1.shape)
df_target_data_1.head()

## 1.3 - Test set:

In [None]:
df_data_2 = pd.read_csv("../common/albertom/train_test_split/X_test_pca.csv", index_col="Unnamed: 0")
# df_data_2 = df_data_2.head(100)
print(df_data_2.shape)
df_data_2.head()

## 1.4 - Test labels:

In [None]:
df_target_data_2 = pd.read_csv("../common/albertom/train_test_split/y_test.csv", index_col="Unnamed: 0")
# df_target_data_2 = df_target_data_2.head(100)
print(df_target_data_2.shape)
df_target_data_2.head()

# 2 - Globals:

## 2.1 - Parameters 

In [None]:
n_folds = 3
valid_size = df_data_1.shape[0]/n_folds
batch_size = 128

train_dataset = df_data_1.values
test_dataset = df_data_2.values
train_labels = df_target_data_1["0"].values
test_labels = df_target_data_2["0"].values

num_features = train_dataset.shape[1]
num_examples = train_dataset.shape[0]
distinct_labels = len(df_target_data_1["0"].unique())

## 2.2 - Tensorflow graph: 

In [None]:
graph = tf.Graph()
with graph.as_default():

    # Input data.
    X = tf.placeholder(tf.float32, shape=(batch_size, num_features))
    t = tf.placeholder(tf.float32, shape=(batch_size, distinct_labels))
    X_unseen = tf.placeholder(tf.float32, shape=(valid_size, num_features))
    
    L2_reg = tf.placeholder(tf.float32, shape=[])
    learning_rate = tf.placeholder(tf.float32, shape=[])

    # Variables.
    num_hidden1 = 10
    num_hidden2 = 7
    
    W1 = tf.Variable(tf.truncated_normal([num_features, num_hidden1]) )
    b1 = tf.Variable(tf.zeros([num_hidden1]))

    W2 = tf.Variable(tf.truncated_normal([num_hidden1, distinct_labels]))
    b2 = tf.Variable(tf.zeros([distinct_labels]))
    
    # Training.
    h1 = tf.nn.relu(tf.matmul(X, W1) + b1)
    logits = tf.matmul(h1, W2) + b2
    
    # Loss NO reg.
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, t))
    regularization = (tf.nn.l2_loss(W1) + tf.nn.l2_loss(b1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(b2))
    loss = loss + L2_reg * regularization

    # Optimizer.
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)

    # Predictions for training, validation.
    train_predictions = tf.nn.softmax(logits)
    unseen_predictions = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(X_unseen, W1) + b1), W2) + b2)

## 2.3 - Functions:

In [None]:
# Given a set of model parameters (learning rate, regularization penalty coefficient), find the optimal parameters with cross validation

def cross_validation(set_of_learning_rates, set_of_regs, X_train, y_train):
    
    # Get train-validation set stratified (keeps the same distribution) splitter
    skf = StratifiedKFold(n_splits=3)
    set_of_params = [(x,y) for x in set_of_learning_rates for y in set_of_regs]
    
    print("Number of folds: " + str(n_folds))
    print("Number of parameters combinations: " + str(len(set_of_params)))
    print("We will train " + str(n_folds * len(set_of_params)) + " neural networks for this cv task.")
    
    f1 = np.zeros(len(set_of_params))

    # Repeat for every train-validate combination 
    for cv_train_index, cv_val_index in skf.split(X_train, y_train):
        
        X_cv_train, X_cv_val = X_train[cv_train_index], X_train[cv_val_index]
        y_cv_train, y_cv_val = y_train[cv_train_index], y_train[cv_val_index]
        with tf.Session(graph=graph) as session:
            for param_idx, param in enumerate(set_of_params):
                
                print(str(param_idx) + " ) Combination: [Parameters: " +  str(param) + " | Train model]")
                for step in np.arange(num_steps):
                    
                    offset = (step * batch_size) % (num_examples - batch_size)
                    X_batch = X_cv_train[offset:(offset + batch_size), :]
                    t_batch = y_cv_train[offset:(offset + batch_size), :]
                    feed_dict = {
                        X : X_batch,
                        t : t_batch,
                        X_unseen : X_cv_val,
                        L2_reg : param[1],
                        learning_rate : param[0]
                    }
                    _, l, pred_batch = session.run( [optimizer, loss, train_predictions], feed_dict=feed_dict)
                    
                    if (step % 500 == 0):
                        print("> Minibatch loss at step %d: %f" % (step, l))
                        print("> Minibatch f1: %.1f%%" % f1(pred_batch, t_batch))
                        print("> Validation f1: %.1f%%" % f1(unseen_predictions.eval(), y_cv_val) )
                
                f1_cv = f1(valid_predictions.eval(), y_cv_val)
                print("|> Final Validation f1: %.1f%%" % f1_cv)
                f1[param_idx] = f1[param_idx] + f1_cv

    f1 = f1 / n_folds # Calculate average error
    

    print(">>>>>> Best [learning rate - f1] = ", set_of_params[np.argmax(f1)][1])
    print(">>>>>> BEST [reg - f1] = ", set_of_params[np.argmax(f1)][0])

    return set_of_params[np.argmax(f1)]

In [None]:
def f1(predicted_classes, real_classes):
    return f1_score(real_classes, predicted_classes)

# 3 - Training: 

## 3.1 - Cross validation for optimal hyperparameters:

In [None]:
set_of_learning_rates = np.arange(0.01, 0.03, step=0.01)
set_of_L2_regs = np.arange(0.01, 0.03, step=0.01)

print("Cross-validation for [learning rate, L2_reg].")

best_params = cross_validation(set_of_learning_rates, set_of_L2_regs, train_dataset, train_labels)

best_learning_rate = best_params[0]
best_L2_reg = best_params[1]

print("Best parameters: [" + str(best_learning_rate) + ", " str(best_L2_reg) + "]")

## 3.2 - Prediction using optimal hyperparameters:

In [None]:
print("Prediction for test set.")

with tf.Session(graph=graph) as session:
    for param_idx, param in enumerate(set_of_params):
                
    print(str(param_idx) + " ) Combination: [Parameters: " +  str(param) + " | Train model]")
        for step in np.arange(num_steps):
                    
            X_batch = X_cv_train[offset:(offset + batch_size), :]
            t_batch = y_cv_train[offset:(offset + batch_size), :]
            offset = (step * batch_size) % (num_examples - batch_size)
            feed_dict = {
                X : X_batch,
                t : t_batch,
                X_unseen : test_dataset,
                L2_reg : best_L2_reg,
                learning_rate : best_learning_rate
            }
            if (step % 500 == 0):
                print("> Minibatch loss at step %d: %f" % (step, l))
                print("> Minibatch f1: %.1f%%" % f1(pred_batch, t_batch))
                print("> Validation f1: %.1f%%" % f1(unseen_predictions.eval(), y_cv_val) )
                
        f1_test = f1(unseen_predictions.eval(), test_labels)
        print("|> Test f1: %.1f%%" % f1_test)