In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import read_csv
from pandas import DataFrame

from numpy import dstack

import copy

import os
from glob import glob

from path import Path

from numpy import mean
from numpy import std

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import SimpleRNN

from keras.layers import Bidirectional

from keras.utils.vis_utils import plot_model

from keras.callbacks import EarlyStopping

from keras.optimizers import Adam

import keras.backend as K

import random

from sklearn.model_selection import train_test_split

from matplotlib import pyplot

Using TensorFlow backend.


## Load Data

In [None]:
from helpers.classes.data_loader import DataLoader

root_path = "/Users/allarviinamae/EduWorkspace/openpose-jupyter-data-exploration/augmented-keypoints"

sample_dir_names = [n for n in os.listdir(root_path) if os.path.isdir(f"{root_path}/{n}")]

samples = DataLoader.get_samples_list(sample_dir_names, root_path)

y_labels = DataLoader.get_y_labels(sample_dir_names) # classifier labels, where 0 = backflip and 1 = flack

Loading frames for 0/11300
Loading frames for 1/11300
Loading frames for 2/11300
Loading frames for 3/11300
Loading frames for 4/11300
Loading frames for 5/11300
Loading frames for 6/11300
Loading frames for 7/11300
Loading frames for 8/11300
Loading frames for 9/11300
Loading frames for 10/11300
Loading frames for 11/11300
Loading frames for 12/11300
Loading frames for 13/11300
Loading frames for 14/11300
Loading frames for 15/11300
Loading frames for 16/11300
Loading frames for 17/11300
Loading frames for 18/11300
Loading frames for 19/11300
Loading frames for 20/11300
Loading frames for 21/11300
Loading frames for 22/11300
Loading frames for 23/11300
Loading frames for 24/11300
Loading frames for 25/11300
Loading frames for 26/11300
Loading frames for 27/11300
Loading frames for 28/11300
Loading frames for 29/11300
Loading frames for 30/11300
Loading frames for 31/11300


In [None]:
i = 5

# Print some sample dir names
a = [print(sample_dir_name) for sdn_i, sample_dir_name in enumerate(sample_dir_names) if sdn_i < i]
a = [print(y_label) for y_i, y_label in enumerate(y_labels) if y_i < i]

## Data padding

In [None]:
from helpers.classes.padder import Padder

padded_samples_list = Padder.get_padded_samples(samples)
categorical_y_labels = DataLoader.get_categorical_y_labels(y_labels)

padded_samples_ndarray = np.asarray(padded_samples_list)
categorical_y_labels_ndarray = np.asarray(categorical_y_labels)

## Build model

In [None]:
def get_simple_rnn_model(lstm_units, n_outputs, n_features, n_timesteps):
    optimizer = Adam(lr=0.001)
    
    input_shape = (n_timesteps, n_features)
    print(f"Model input shape {input_shape}")
        
    model = Sequential()
    model.add(SimpleRNN(lstm_units, 
                        return_sequences=True,
                        input_shape=input_shape))
    model.add(Dropout(0.5))
    model.add(Dense(lstm_units, activation='relu'))
    model.add(Flatten())
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    
    return model

def evaluate_model(trainX, trainy, testX, testy):
    verbose, epochs = 1, 5
    
    #n_steps_per_epoch = 360
    
    lstm_units = 2

    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    
    model = get_simple_rnn_model(lstm_units, n_outputs, n_features, n_timesteps)
    
    print(model.summary())
    
    batch_size = 1
    
    es_callback = EarlyStopping(monitor='val_loss',
                                patience=3)
    
    history = model.fit(trainX,
                        trainy,
                        epochs=epochs,
                        batch_size=batch_size,
                        verbose=verbose,
                        validation_split=0.33,
                        #callbacks=[es_callback]
                       )
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    
    # fit network
    #model.fit_generator(train_generator(trainX, trainy), steps_per_epoch=n_steps_per_epoch, epochs=epochs, verbose=verbose)
    # evaluate model
    #_, accuracy = model.evaluate_generator(train_generator(testX, testy), steps=len(testX), verbose=0)
    
    return history, accuracy, model

In [None]:
def summarize_results(scores):
    print(scores)
    m, s = mean(scores), std(scores)
    print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

In [None]:
def save_model(model, repeat):
    # serialize model to JSON
    model_json = model.to_json()
    with open(f"model-output/model-{repeat}.json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights(f"model-output/model-{repeat}.h5")
    
    plot_model(model, to_file=f"model-output/model-plot-{repeat}.png", show_shapes=True, show_layer_names=True)
    
    print("Saved model to disk")

## Run experiment

In [None]:
X_train, X_test, y_train, y_test = train_test_split(padded_samples_ndarray,
                                                    categorical_y_labels_ndarray,
                                                    test_size=0.2,
                                                    random_state=42)

print(f"X train len: {len(X_train)}, y train len:{len(y_train)} --- X test len:{len(X_test)}, y test len:{len(y_test)}")

train_loss_history = DataFrame()
val_loss_history = DataFrame()

train_accuracy_history = DataFrame()
val_accuracy_history = DataFrame()

model_history = []

# run an experiment
def run_experiment(repeats=5):
    # repeat experiment
    scores = list()
    last_history = None
    
    for r in range(repeats):
        history, score, model = evaluate_model(X_train, y_train, X_test, y_test)
        
        model_history.append(model)
        save_model(model, r)
        
        # story history
        train_loss_history[str(r)] = history.history['loss']
        val_loss_history[str(r)] = history.history['val_loss']
        train_accuracy_history[str(r)] = history.history['accuracy']
        val_accuracy_history[str(r)] = history.history['val_accuracy']
        
        score = score * 100.0
        print('>#%d validation accuracy: %.3f' % (r+1, score))
        scores.append(score)
        
    # summarize results
    summarize_results(scores)

In [None]:
run_experiment()

## Plots

In [None]:
pyplot.plot(train_loss_history['3'], color='blue', label='train')
pyplot.plot(val_loss_history['3'], color='orange', label='validation')
pyplot.title('LSTM model train vs validation loss')
pyplot.ylabel('loss')
pyplot.xlabel('epoch')
pyplot.legend(['train', 'validation'], loc='upper right')
#pyplot.show()

pyplot.savefig('model-output/model-train-vs-validation-loss.png')

In [None]:
pyplot.plot(train_accuracy_history['3'], color='blue', label='train')
pyplot.plot(val_accuracy_history['3'], color='orange', label='validation')
pyplot.title('LSTM model train vs validation accuracy')
pyplot.ylabel('accuracy')
pyplot.xlabel('epoch')
pyplot.legend(['train', 'validation'], loc='upper right')
#pyplot.show()

pyplot.savefig('model-output/model-train-vs-validation-accuracy.png')

## Sample

In [None]:
example_sample_idx = 89
model_history_idx = 3

example_sample_dir_name = sample_dir_names[example_sample_idx]
example_sample = padded_samples_ndarray[example_sample_idx]
example_model = model_history[model_history_idx]

## Predictions

In [None]:
def predictions(loaded_model, samples, sample_dir_names, y_labels):
    ynew = loaded_model.predict_classes(samples)
    # show the inputs and predicted outputs
    for i in range(len(samples)):
        pred_y = ynew[i]
        actual_y = y_labels[i]
    
        same = False
        if pred_y == actual_y:
            same = True
    
        print("Name=%s, X=%s, Predicted=%s, Actual=%s, same=%s" % (sample_dir_names[i], i, pred_y, actual_y, same))
    
predictions(example_model, padded_samples_ndarray, sample_dir_names, y_labels)

## Activation

In [None]:
layer_names = [layer.name for layer in example_model.layers]

print(example_sample_dir_name)
print(example_sample.shape)
print(layer_names)

def activations(model, example_sample): 
    n_timesteps, n_features = example_sample.shape[0], example_sample.shape[1]
    
    print(f"n_timesteps: {n_timesteps}, n_features: {n_features}")
    
    x = np.zeros((1, n_timesteps, n_features))
    
    for t, timestep in enumerate(example_sample):
        for f, feature in enumerate(timestep):
            x[0, t, f] = example_sample[t][f]
                
    output = model.get_layer('simple_rnn_4').output
    
    f = K.function([model.input], [output])
    
    return f([x])[0][0]


act = activations(model_history[model_history_idx], example_sample)
print(act)
act.shape

In [None]:
from io import BytesIO
from PIL import Image as PILImage
from PIL import ImageDraw
from IPython.display import Image

def get_image(img, n_timesteps, img_idx, cell_size=48):
    img_width = n_timesteps * 25
    cell_size = int(img_width / n_timesteps)
    
    pil_image = PILImage.fromarray(img.astype(np.uint8))
    
    resized_pil_image = pil_image.resize((img_width, cell_size))
    #resized_pil_image = pil_image
    
    draw = ImageDraw.Draw(resized_pil_image)
    
    for n_timestep in range(n_timesteps):
        text = str((img_idx * 30) + n_timestep)
        xy = (n_timestep * cell_size, 0)
        
        draw.text(xy, text)
        
    f = BytesIO()
    resized_pil_image.save(f, 'png')
    return Image(data=f.getvalue())

def visualize_neurons(act, cell_size=48):
    n_neurons = act.shape[1]
    n_timesteps = act.shape[0]
    
    fill_value = 128
    
    img = np.full((n_neurons + 1, n_timesteps, 3), fill_value)
    
    # add 1 to each value in matrix and then divide by 2
    scores = (act[:, :].T + 1) / 2
    
    img[1:, :, 0] = 255 * (1 - scores)
    img[1:, :, 1] = 255 * scores

    first_hs_img = img[:, :30, :]
    second_hs_img = img[:, 30:60, :]
    third_hs_img = img[:, 60:90, :]
    fourth_hs_img = img[:, 90:, :]
    
    imgs = [first_hs_img,
            second_hs_img,
            third_hs_img,
            fourth_hs_img]
    
    actual_imgs = []
    for i, img in enumerate(imgs):
        n_img_timesteps = img.shape[1]
        
        actual_imgs.append(get_image(img, n_img_timesteps, i))
    
    return actual_imgs

example_sample_imgs = visualize_neurons(act)

for img in example_sample_imgs:
    display(img)