In [1]:
cd ..

/Users/svenvanderburg/projects/modys-video


# Experiment 1.1 Explore models and approaches for deep learning
Finetune models on left leg amplitude prediction with lying videos as input.
For now this is just to quickly test the settings that Shankara got out of previous experiments, i.e.:

* Cutoff first 50 frames
* Interpolate when likelihood is below 0.7
* Use opposite bodypart if all likelihood is below 0.7 for a bodypart
* Use a standard scaler to scale
    

We don't really know whether this is the best set of hyperparameters. We can further experiment with the following hyperparameters:
* Use a StandardScaler
* Include likelihood
* Use different model architectures
* Use interpolation
* write one function that takes in parameters

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GroupShuffleSplit
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
import tensorflow.keras as keras


from src.helpers import read_scores
from src.data_generators import RawDataGenerator
from src.data_selection import MultipleScoreSelector
from src.ai_func import cross_validation_generator
from src.settings import LYING_VIDEOS_DATA_FOLDER, SITTING_VIDEOS_DATA_FOLDER, DATA_FOLDER


from keras.models import Sequential
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Lambda
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.advanced_activations import LeakyReLU

%load_ext autoreload
%autoreload 2

In [3]:
scores_df = read_scores(DATA_FOLDER / 'data_Scoring_DIS_proximal_trunk_V1.1.xlsx')

## Definitions

In [32]:
SCORES_TO_USE = ['D_LLP_R_tA_pscore']
SCORER_TO_USE = 1

data_generation_params = {
    'videos_folder': LYING_VIDEOS_DATA_FOLDER,
    'cutoff': 50,
    'interpolation_threshold': 0.7,
    'batch_size': 1
}  

## Pipeline for training a deep neural network

### Define model architecture (here: simple CNN)

In [33]:
# Generate some dev data to get X shape
selector = MultipleScoreSelector(scores_to_use=SCORES_TO_USE, scorer_to_use=SCORER_TO_USE)
dev_selection = selector.transform(scores_df)
dev_generator = RawDataGenerator(dev_selection, **data_generation_params)
X, y = dev_generator.__getitem__(0)
n_timesteps, n_features = (X.shape[1], X.shape[2])



In [34]:
n_outputs = len(SCORES_TO_USE)

def get_model():
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=32, kernel_size=5, activation='relu', input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters=32, kernel_size=5, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Dense(1))
    return model


def get_model_old():
    # simple CNN
    input_layer = keras.layers.Input(shape=(n_timesteps,n_features))
    norm_layer = keras.layers.BatchNormalization()(input_layer)
    cnn_layer = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(norm_layer)
    cnn_layer = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(cnn_layer)
    cnn_layer = keras.layers.MaxPooling1D(pool_size=2)(cnn_layer)
    cnn_layer = keras.layers.Dropout(0.7)(cnn_layer)
    cnn_layer = keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_layer)
    cnn_layer = keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_layer)
    cnn_layer = keras.layers.MaxPooling1D(pool_size=2)(cnn_layer)
    cnn_layer = keras.layers.Dropout(0.7)(cnn_layer)
    cnn_layer = keras.layers.Flatten()(cnn_layer)
    cnn_layer = keras.layers.Dense(100)(cnn_layer)
    output_layer = keras.layers.Dense(n_outputs)(cnn_layer)

    return keras.Model(inputs=input_layer, outputs=output_layer)
get_model().summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_24 (Conv1D)           (None, 449, 64)           3520      
_________________________________________________________________
conv1d_25 (Conv1D)           (None, 447, 64)           12352     
_________________________________________________________________
max_pooling1d_12 (MaxPooling (None, 223, 64)           0         
_________________________________________________________________
conv1d_26 (Conv1D)           (None, 219, 32)           10272     
_________________________________________________________________
conv1d_27 (Conv1D)           (None, 215, 32)           5152      
_________________________________________________________________
max_pooling1d_13 (MaxPooling (None, 107, 32)           0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 3424)             

## Train network

In [35]:
def train_model(train_generator):
    model = get_model()
    model.compile(loss='mae', optimizer=keras.optimizers.Adam())
    model.fit(train_generator, epochs=30)
    return model

def train_cross_val(cross_val):
    y_pred = []
    y_test = []
    for i_split, (train_scores, test_scores) in enumerate(cross_val):
        print(f'Fitting for 5-fold split {i_split}')
        train_generator = RawDataGenerator(train_scores, **data_generation_params)
        test_generator = RawDataGenerator(test_scores, **data_generation_params)
        model = train_model(train_generator)
        y_pred.append(model.predict(test_generator))
        y_test.append(test_scores)
        break # This results in training only for one of the 5 folds
    y_pred = np.vstack(y_pred)
    y_test = pd.concat(y_test)
    return y_test, y_pred

def evaluate(y_test, y_pred):
    results = []
    for i_score, column in enumerate(y_test):
        mae = mean_absolute_error(y_test.iloc[:, i_score], y_pred[:, i_score])
        results.append({'score': column, 'mae': mae})
    return pd.DataFrame(results)

In [36]:
print(f'Training model for scorer {SCORER_TO_USE}')
selector = MultipleScoreSelector(scores_to_use=SCORES_TO_USE, scorer_to_use=SCORER_TO_USE)
selected_data = selector.transform(scores_df)
cross_val = cross_validation_generator(selected_data)
y_test, y_pred = train_cross_val(cross_val)

Training model for scorer 1
Fitting for 5-fold split 0
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [37]:
print(evaluate(y_test, y_pred))

               score       mae
0  D_LLP_R_tA_pscore  0.103936
