# Experiment 3: Basic deep learning
Train 2 deep learning models. 1 on lying videos, 1 on sitting videos: 

### Experiment 3.1 

* input: lying video 
* Output: Left/right leg amplitude/duration (4 scores) 

### Experiment 3.2 

* input: sitting video 
* Output: Left/right arm amplitude/duration (4 scores) 

In [59]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import tensorflow.keras as keras

from src.helpers import read_scores
from src.data_generators import RawDataGenerator
from src.data_selection import MultipleScoreSelector
from src.ai_func import cross_validation_generator
from src.settings import LYING_VIDEOS_DATA_FOLDER, SITTING_VIDEOS_DATA_FOLDER, DATA_FOLDER

In [2]:
%load_ext autoreload
%autoreload 2

### Read metadata

In [60]:
scores_df = read_scores(DATA_FOLDER / 'data_Scoring_DIS_proximal_trunk_V1.1.xlsx')

## Definitions

In [89]:
SCORES_TO_USE = ['D_RLP_R_tD_pscore', 'D_LLP_R_tD_pscore', 'D_RLP_R_tA_pscore', 'D_LLP_R_tA_pscore']
SCORERS = [1, 2, 3]

## Pipeline for training a deep neural network

### Define model architecture (here: simple CNN)

In [62]:
# Generate some dev data to get X shape
dev_selection = selector.transform(scores_df)
dev_generator = RawDataGenerator(dev_selection, videos_folder=LYING_VIDEOS_DATA_FOLDER)
X, y = dev_generator.__getitem__(0)
n_timesteps, n_features = (X.shape[1], X.shape[2])



In [95]:
n_outputs = len(SCORES_TO_USE)

def get_model():
    # simple CNN
    input_layer = keras.layers.Input(shape=(n_timesteps,n_features))
    cnn_layer = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer)
    cnn_layer = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(cnn_layer)
    cnn_layer = keras.layers.MaxPooling1D(pool_size=2)(cnn_layer)
    cnn_layer = keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_layer)
    cnn_layer = keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_layer)
    cnn_layer = keras.layers.MaxPooling1D(pool_size=2)(cnn_layer)
    cnn_layer = keras.layers.Flatten()(cnn_layer)
    cnn_layer = keras.layers.Dense(100)(cnn_layer)
    output_layer = keras.layers.Dense(n_outputs)(cnn_layer)

    return keras.Model(inputs=input_layer, outputs=output_layer)
get_model().summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 501, 42)]         0         
_________________________________________________________________
conv1d_8 (Conv1D)            (None, 499, 32)           4064      
_________________________________________________________________
conv1d_9 (Conv1D)            (None, 497, 32)           3104      
_________________________________________________________________
max_pooling1d_4 (MaxPooling1 (None, 248, 32)           0         
_________________________________________________________________
conv1d_10 (Conv1D)           (None, 246, 64)           6208      
_________________________________________________________________
conv1d_11 (Conv1D)           (None, 244, 64)           12352     
_________________________________________________________________
max_pooling1d_5 (MaxPooling1 (None, 122, 64)           0   

## Train network

### 

In [108]:
def train_model(train_generator, val_generator):
    model = get_model()
    model.compile(loss='mse', optimizer=keras.optimizers.Adam())
    model.fit(train_generator, epochs=20, validation_data=val_generator)
    return model

def train_cross_val(cross_val):
    y_pred = []
    y_test = []
    for i_split, (train_scores, test_scores) in enumerate(cross_val):
        print(f'Fitting for 5-fold split {i_split}')
        train_scores, val_scores = train_test_split(train_scores, train_size=0.8)
        train_generator = RawDataGenerator(train_scores, videos_folder=LYING_VIDEOS_DATA_FOLDER)
        val_generator = RawDataGenerator(val_scores, videos_folder=LYING_VIDEOS_DATA_FOLDER)
        test_generator = RawDataGenerator(test_scores, videos_folder=LYING_VIDEOS_DATA_FOLDER)
        model = train_model(train_generator, val_generator)
        y_pred.append(model.predict(test_generator))
        y_test.append(test_scores)
    y_pred = np.vstack(y_pred)
    y_test = pd.concat(y_test)
    return y_test, y_pred

def evaluate(y_test, y_pred):
    results = []
    for i_score, column in enumerate(y_test):
        mae = mean_absolute_error(y_test.iloc[:, i_score], y_pred[:, i_score])
        results.append({'score': column, 'mae': mae})
    return pd.DataFrame(results)

In [None]:
results = []
for scorer in SCORERS:
    print(f'Training model for scorer {scorer}')
    selector = MultipleScoreSelector(scores_to_use=SCORES_TO_USE, scorer_to_use=scorer)
    selected_data = selector.transform(scores_df)
    cross_val = cross_validation_generator(selected_data)
    y_test, y_pred = train_cross_val(cross_val)
    results.append((y_test, y_pred))

Training model for scorer 1




Fitting for 5-fold split 0
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Fitting for 5-fold split 1
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Fitting for 5-fold split 2
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Fitting for 5-fold split 3
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoc



Fitting for 5-fold split 0
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Fitting for 5-fold split 1
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Fitting for 5-fold split 2
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Fitting for 5-fold split 3
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoc



Fitting for 5-fold split 0
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
 5/28 [====>.........................] - ETA: 0s - loss: 7.7218

In [107]:
for scorer, (y_test, y_pred) in zip(SCORERS, results):
    print(f'results for scorer {scorer}')
    print(evaluate(y_test, y_pred))

results for scorer 1
               score       mae
0  D_RLP_R_tD_pscore  7.678542
1  D_LLP_R_tD_pscore  6.814481
2  D_RLP_R_tA_pscore  3.441207
3  D_LLP_R_tA_pscore  5.150673
results for scorer 2
               score       mae
0  D_RLP_R_tD_pscore  4.864221
1  D_LLP_R_tD_pscore  3.773288
2  D_RLP_R_tA_pscore  2.378368
3  D_LLP_R_tA_pscore  2.286805
results for scorer 3
               score       mae
0  D_RLP_R_tD_pscore  3.947798
1  D_LLP_R_tD_pscore  3.033897
2  D_RLP_R_tA_pscore  6.401334
3  D_LLP_R_tA_pscore  5.162821
