# <center>City, University of London - MSc Artificial Intelligence
# <center>INM701 - COURSEWORK
## LSTM algorithm
### Student:
>García Plaza, Albert

***

The present notebook has been written in order to classify the data using a Long-Short Term Memory network.

### Importing necessary packages

In [None]:
import io
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
from pylab import rcParams
from sklearn import metrics
import tensorflow as tf

### Loading data

We have worked on Google's clod computing server _GOOGLE COLAB_, so the next cell is only valid to work in the same way.

In [None]:
##### LOAD FILES FROM GOOGLE DRIVE TO RUN CODE IN GOOGLE COLAB #####
# Code to read csv file into Colaboratory:!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Google Drive shareable links
pa_link = 'https://drive.google.com/open?id=12x7DjbbMAVlEdt2yPG5oqDVQXJQcKso0'
pg_link = 'https://drive.google.com/open?id=1pwjF27YRF1Pnll8zh7QK_LI3CfS2id81'
wa_link = 'https://drive.google.com/open?id=1c2Fz-ULrtOwcBcRQ3yTPoARApoxiCXRF'
wg_link = 'https://drive.google.com/open?id=1XEPvoiQEGhOcr-dXqi33rbY5TWQ3MlMy'
all_link = 'https://drive.google.com/open?id=1yTO_ZHBvJpBmuaYTSZcwrVbkdj_ybZs4'

# Verify that you have everything after '='
fluff, pa_id = pa_link.split('=') 
fluff, pg_id = pg_link.split('=')
fluff, wa_id = wa_link.split('=')
fluff, wg_id = wg_link.split('=')
fluff, all_id = all_link.split('=')

# Load datasets into Python session
pa_downloaded = drive.CreateFile({'id':pa_id}) 
pa_downloaded.GetContentFile('Filename.pkl')  
pa_prep = pd.read_pickle('Filename.pkl')
pg_downloaded = drive.CreateFile({'id':pg_id}) 
pg_downloaded.GetContentFile('Filename.pkl')  
pg_prep = pd.read_pickle('Filename.pkl')
wa_downloaded = drive.CreateFile({'id':wa_id}) 
wa_downloaded.GetContentFile('Filename.pkl')  
wa_prep = pd.read_pickle('Filename.pkl')
wg_downloaded = drive.CreateFile({'id':wg_id}) 
wg_downloaded.GetContentFile('Filename.pkl')  
wg_prep = pd.read_pickle('Filename.pkl')
all_downloaded = drive.CreateFile({'id':all_id}) 
all_downloaded.GetContentFile('Filename.pkl')  
all_prep = pd.read_pickle('Filename.pkl')

To work on local directory, the next cell has to been used to load the data, instead of previously cell.

In [None]:
##### LOAD FILES FROM LOCAL DIRECTORY #####
pa_prep = pd.read_pickle('pa_prep.pkl')
pg_prep = pd.read_pickle('pg_prep.pkl')
wa_prep = pd.read_pickle('wa_prep.pkl')
wg_prep = pd.read_pickle('wg_prep.pkl')
all_prep = pd.read_pickle('all_prep.pkl')

### Last minute data pre-processing

In [None]:
# Take only original columns (from raw data) of each dataset and change their names
pa_prep = pa_prep.drop(columns=['P_LO_x', 'P_LO_y', 'P_LO_z'])
pg_prep = pg_prep.drop(columns=['P_AA_x', 'P_AA_y', 'P_AA_z', 'P_AO_x', 'P_AO_y', 'P_AO_z'])
wa_prep = wa_prep.drop(columns=['W_LO_x', 'W_LO_y', 'W_LO_z'])
wg_prep = wg_prep.drop(columns=['W_AA_x', 'W_AA_y', 'W_AA_z', 'W_AO_x', 'W_AO_y', 'W_AO_z'])
pa_prep = pa_prep.rename(columns={'P_LA_x':'x', 'P_LA_y':'y', 'P_LA_z':'z'})
pg_prep = pg_prep.rename(columns={'P_AV_x':'x', 'P_AV_y':'y', 'P_AV_z':'z'})
wa_prep = wa_prep.rename(columns={'W_LA_x':'x', 'W_LA_y':'y', 'W_LA_z':'z'})
wg_prep = wg_prep.rename(columns={'W_AV_x':'x', 'W_AV_y':'y', 'W_AV_z':'z'})

# Set the window of 200 entries
WINDOW = 200

# Create a tuple with the correct shape to be readed by the LSTM
X, y = [], []
for i in range(51):
    for j in range(18):
        for k in range(13):
            start_index = 500 + k * WINDOW
            end_index = start_index + WINDOW
            user_lstm = wa_prep[wa_prep.user == i]
            to_append = user_lstm[user_lstm.activity == j][start_index:end_index]
            xs = to_append['x'].values
            ys = to_append['y'].values
            zs = to_append['z'].values
            X.append([xs, ys, zs])
            y.append(j)           
X_prep = np.zeros((11934, WINDOW, 3))
for i in range(11934):
    for j in range(200):
        for k in range(3):
            try: X_prep[i][j][k] = X[i][k][j]
            except: pass
y_prep = np.asarray(pd.get_dummies(y), dtype = np.float32)

# Split the data into a 20% for testing (last 10 users with all activities)
X_train = X_prep[:9594]
X_test = X_prep[9594:]
y_train = y_prep[:9594]
y_test = y_prep[9594:]

### Main functions declaration

In [None]:
# Create the LSTM with the specified number of hidden units
def create_LSTM(inputs):
    '''Given the array inputs as only argument, the function creates a LSTM with one hidden layer and the specified
    number or hidden neurons (units), with random initialization of weights and biases (as dictionaries)'''
    
    # Weights and biases initialization
    W = {'hidden': tf.Variable(tf.random_normal([N_FEATURES, N_HIDDEN_UNITS])),
         'output': tf.Variable(tf.random_normal([N_HIDDEN_UNITS, N_CLASSES]))}
    B = {'hidden': tf.Variable(tf.random_normal([N_HIDDEN_UNITS], mean=1.0)),
         'output': tf.Variable(tf.random_normal([N_CLASSES]))}
    
    # Feed-forward pass
    X = tf.transpose(inputs, [1, 0, 2])
    X = tf.reshape(X, [-1, N_FEATURES])
    hidden = tf.nn.relu(tf.matmul(X, W['hidden']) + B['hidden'])
    hidden = tf.split(hidden, N_TIME_STEPS, 0)

    # Stack 2 LSTM layers
    lstm_layers = [tf.contrib.rnn.BasicLSTMCell(N_HIDDEN_UNITS, forget_bias=1.0) for _ in range(2)]
    lstm_layers = tf.contrib.rnn.MultiRNNCell(lstm_layers)
    
    # Operate with all timesteps
    outputs, _ = tf.contrib.rnn.static_rnn(lstm_layers, hidden, dtype=tf.float32)

    # Output last timestep
    lstm_last_output = outputs[-1]

    return tf.matmul(lstm_last_output, W['output']) + B['output']

# Print LSTM results and plots
def print_results(history, y_test, predictions, f1scores, plot):
    '''Passing values for train/test loss and accuracy, calculates the macro-averaged F1 Score.
    If f1scores argument is True, print the F1 Scores for all activities; if plot is True, shows 
    loss and accuracy line plots and confusion matrix'''
    
    # Generate the confusion matrix tuple
    max_test = np.argmax(y_test, axis=1)
    max_predictions = np.argmax(predictions, axis=1)
    confusion_matrix = metrics.confusion_matrix(max_test, max_predictions)
    
    # Computation of the F1 Scores (and macro-averaged F1 Score) through data in confusion matrix tuple
    recall = []
    precision = []
    f1s = []
    for i in range(18):
        recall.append(confusion_matrix[i][i] / confusion_matrix[i].sum())
        precision.append(confusion_matrix[i][i] / confusion_matrix.T[i].sum())
        f1 = (2 * precision[-1] * recall[-1])/(precision[-1] + recall[-1])
        f1s.append(f1)
    f1s = np.array(f1s)
    f1s[np.isnan(f1s)] = 0
    
    # Print macro-averaged F1 Score
    print("Macro F1 Score: %.4f" %f1s.mean())
    
    # Print F1 Score for all activities when f1scores=True
    if f1scores:
        print(f1s)
    
    # Plot train/test loss and accuracy vs. epochs when plot=True
    if plot:
        plt.figure(figsize=(12, 8))
        plt.plot(np.array(history['train_loss']), "r-", label="Train loss")
        plt.plot(np.array(history['test_loss']), "g-", label="Test loss")
        plt.title("Training and Test Loss over Iterarions")
        plt.ylabel('Loss value)')
        plt.xlabel('Training Epoch')
        plt.ylim(0)
        plt.xlim(0,N_EPOCHS)
        plt.show()

        plt.figure(figsize=(12, 8))
        plt.plot(np.array(history['train_acc']), "r-", label="Train accuracy")
        plt.plot(np.array(history['test_acc']), "g-", label="Test accuracy")
        plt.title("Training and Test Accuracy over Iterarions")
        plt.legend(loc='upper right', shadow=True)
        plt.ylabel('Accuracy values')
        plt.xlabel('Training Epoch')
        plt.ylim(0,1)
        plt.xlim(0,N_EPOCHS)
        plt.show()

        plt.figure(figsize=(16, 14))
        sns.heatmap(confusion_matrix, annot=True, fmt="d", cmap="Blues")
        plt.title("Confusion matrix")
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.show()

# Main function which creates the LSTM, runs the training process and shows results
def LSTM_train(N_CLASSES, N_HIDDEN_UNITS, N_TIME_STEPS, N_FEATURES, N_EPOCHS, BATCH_SIZE, LEARNING_RATE, L2_LOSS, f1scores=False, plot=False): 
    # Reset current TensorFlow Graph (clear sets of TensorFlow Operations and Tensors)
    tf.reset_default_graph()

    # Initialize TensorFlow Placeholders (variables that will be assigned data later) for input and output values as float
    X = tf.placeholder(tf.float32, [None, N_TIME_STEPS, N_FEATURES])
    Y = tf.placeholder(tf.float32, [None, N_CLASSES])

    # Computation of feed-forward prograpation
    pred_Y = create_LSTM(X)
    pred_softmax = tf.nn.softmax(pred_Y, name="y_")

    # Computation of L2 loss and total loss
    l2 = L2_LOSS * sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = pred_Y, labels = Y)) + l2

    # Computation of backpropagation
    optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(loss)
    correct_pred = tf.equal(tf.argmax(pred_softmax, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))
    saver = tf.train.Saver()

    # Initialize the dictionaries where loss and accuracy history will be saved
    history = dict(train_loss=[], train_acc=[], test_loss=[], test_acc=[])

    # Start TensorFlow session
    sess=tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    # Iterate over specified EPOCHS 
    train_count = len(X_train)
    for i in range(1, N_EPOCHS + 1):
        for start, end in zip(range(0, train_count, BATCH_SIZE), range(BATCH_SIZE, train_count + 1,BATCH_SIZE)):
            sess.run(optimizer, feed_dict={X: X_train[start:end], Y: y_train[start:end]})

        _, acc_train, loss_train = sess.run([pred_softmax, accuracy, loss], feed_dict={
                                                X: X_train, Y: y_train})

        _, acc_test, loss_test = sess.run([pred_softmax, accuracy, loss], feed_dict={
                                                X: X_test, Y: y_test})

        history['train_loss'].append(loss_train)
        history['train_acc'].append(acc_train)
        history['test_loss'].append(loss_test)
        history['test_acc'].append(acc_test)

        if i==1 or i%5 == 0:
            print(f'Epoch: {i} --- test accuracy: {acc_test} --- loss: {loss_test}')
    
    # Last run to obtain final results
    predictions, acc_final, loss_final = sess.run([pred_softmax, accuracy, loss], feed_dict={X: X_test, Y: y_test})
    
    # Show results
    print(f'Final results: accuracy: {acc_final} loss: {loss_final}')
    print_results(history, y_test, predictions, f1scores, plot)

### Hyper-parameters setting and run LSTM

In [None]:
# Architecture of the network
N_CLASSES = len(y_train[0])
N_HIDDEN_UNITS = 60
N_TIME_STEPS = X_train.shape[1]
N_FEATURES = X_train.shape[2]

# Performance hyper-parameters
N_EPOCHS = 50
BATCH_SIZE = 500
LEARNING_RATE = 0.001
L2_LOSS = 0.02

# Result flags
f1scores = False
plot = False

# Run training process
LSTM_train(N_CLASSES, N_HIDDEN_UNITS, N_TIME_STEPS, N_FEATURES, N_EPOCHS, BATCH_SIZE, LEARNING_RATE, L2_LOSS, f1scores, plot)