# Illustrate generator based pipeline

In [1]:
cd ..

/Users/svenvanderburg/projects/modys-video


In [2]:
import numpy as np
import tensorflow.keras as keras

from helpers import read_scores
from src.data_generators import RawDataGenerator
from src.data_selection import MultipleScoreSelector

### Read metadata

In [3]:
path_data = "data/data_lying_052929"
path_metadata = "data/data_Scoring_DIS_proximal_trunk_V1.0.xlsx"

scores_df = read_scores(path_metadata)

In [4]:
scores_df.tail()

Unnamed: 0,video,ID,group,time,scorer,T0_DIS_D_TRUNK_R_tD,T0_DIS_D_TRUNK_R_tD_max,T0_DIS_D_TRUNK_R_tD_pscore,T0_DIS_CA_TRUNK_R_tD,T0_DIS_CA_TRUNK_R_tD_max,...,Left_arm_proximal_CA,T0_DIS_CA_RLP_R_tD_pscore.1,T0_DIS_CA_RLP_R_tA_pscore.1,Right_leg_proximal_CA,T0_DIS_CA_LLP_R_tD_pscore.1,T0_DIS_CA_LLP_R_tA_pscore.1,Left_leg_proximal_CA,Dystonia_mean,CA,video_id
156,12,1034,A,T0,CO,4.0,4.0,1.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65,0.0,12
157,102,1034,A,T12,CO,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65,0.0,102
158,12,1034,A,T0,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.45,0.0,12
159,63,1034,A,T3,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.45,0.0,63
160,102,1034,A,T12,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,102


In [5]:
scores_df["video_id"][0]

'001'

In [7]:
scores_selected = MultipleScoreSelector(scores_to_use = ['T0_DIS_D_RLP_R_tA_pscore'],
                                scorer_to_use='CO',
                                videos_folder=path_data).transform(scores_df)
scores_selected.head()



Unnamed: 0_level_0,T0_DIS_D_RLP_R_tA_pscore
video_id,Unnamed: 1_level_1
1,0.75
31,0.5
48,
64,0.5
112,0.5


## Creat data generator

In [19]:
test_generator = RawDataGenerator(scores_selected, videos_folder=path_data)
X, y = test_generator.__getitem__(0)

In [20]:
X.shape, y.shape

((1, 501, 42), (1, 1))

In [21]:
y

array([[0.75]])

In [22]:
X.max(), X.min()

(510.4855487942696, 0.0002015531063079)

## Pipeline for training a deep neural network

### Define model architecture (here: simple CNN)

In [23]:

n_timesteps, n_features = (X.shape[1], X.shape[2])
n_outputs = 1

# simple CNN
input_layer = keras.layers.Input(shape=(n_timesteps,n_features))
cnn_layer = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer)
cnn_layer = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(cnn_layer)
cnn_layer = keras.layers.MaxPooling1D(pool_size=2)(cnn_layer)
cnn_layer = keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_layer)
cnn_layer = keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_layer)
cnn_layer = keras.layers.MaxPooling1D(pool_size=2)(cnn_layer)
cnn_layer = keras.layers.Flatten()(cnn_layer)
cnn_layer = keras.layers.Dense(100)(cnn_layer)
output_layer = keras.layers.Dense(n_outputs)(cnn_layer)

model = keras.Model(inputs=input_layer, outputs=output_layer)
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 501, 42)]         0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 499, 32)           4064      
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 497, 32)           3104      
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 248, 32)           0         
_________________________________________________________________
conv1d_6 (Conv1D)            (None, 246, 64)           6208      
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 244, 64)           12352     
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 122, 64)           0   

### Train/Test split

In [24]:
unique_video_ids = list(set(scores_selected.index))
len(unique_video_ids)

65

In [14]:
# here just hacky way to quickly do a split. Should later be done in generator?
n_train = 41
n_val = 10
n_test = len(unique_video_ids) - n_train - n_val

np.random.seed(0)
video_ids_train = np.random.choice(unique_video_ids, n_train)
video_ids_val = np.random.choice(list(set(unique_video_ids).difference(set(video_ids_train))), n_val)
video_ids_test = [x for x in unique_video_ids if not x in set(video_ids_train).union(set(video_ids_val))]

In [15]:
set(video_ids_val)

{'004', '007', '039', '043', '076', '078', '082', '087', '097'}

In [16]:
scores_df[scores_df["video_id"].isin(video_ids_val)]

Unnamed: 0,video,ID,group,time,scorer,T0_DIS_D_TRUNK_R_tD,T0_DIS_D_TRUNK_R_tD_max,T0_DIS_D_TRUNK_R_tD_pscore,T0_DIS_CA_TRUNK_R_tD,T0_DIS_CA_TRUNK_R_tD_max,...,Left_arm_proximal_CA,T0_DIS_CA_RLP_R_tD_pscore.1,T0_DIS_CA_RLP_R_tA_pscore.1,Right_leg_proximal_CA,T0_DIS_CA_LLP_R_tD_pscore.1,T0_DIS_CA_LLP_R_tA_pscore.1,Left_leg_proximal_CA,Dystonia_mean,CA,video_id
38,76,1009,B,T12,CO,4.0,4.0,1.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.625,0.0,76
41,76,1009,B,T12,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.175,0.0,76
50,78,1012,A,T12,CO,3.0,4.0,0.75,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.0,78
53,78,1012,A,T12,SFL,1.0,4.0,0.25,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,78
59,4,1014,B,T0,CO,4.0,4.0,1.0,0.0,4.0,...,0.375,0.0,0.0,0.0,0.0,0.0,0.0,0.725,0.125,4
61,4,1014,B,T0,SFL,1.0,4.0,0.25,1.0,4.0,...,0.5,0.5,0.5,0.5,0.5,0.75,0.625,0.45,0.425,4
70,82,1016,A,T12,CO,3.0,4.0,0.75,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.725,0.0,82
73,82,1016,A,T12,SFL,2.0,4.0,0.5,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,82
94,43,1021,A,T0,CO,4.0,4.0,1.0,2.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.85,0.075,43
95,87,1021,A,T12,CO,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.425,0.0,87


### Select scores for train, val, and test set

In [26]:
selector = MultipleScoreSelector(scores_to_use = ['T0_DIS_D_RLP_R_tA_pscore'], scorer_to_use='CO', videos_folder=path_data)

scores_train = selector.transform(scores_df[scores_df["video_id"].isin(video_ids_train)])
scores_val = selector.transform(scores_df[scores_df["video_id"].isin(video_ids_val)])
scores_test = selector.transform(scores_df[scores_df["video_id"].isin(video_ids_test)])

In [27]:
train_generator = RawDataGenerator(scores_train, videos_folder=videos_folder)
val_generator = RawDataGenerator(scores_val, videos_folder=videos_folder)

In [28]:
model.compile(loss='mse', optimizer=keras.optimizers.Adam())

### Train neural network

In [29]:
train_generator.batch_size

1

In [30]:
len(train_generator)

32

In [31]:
train_generator.indexes

Index(['001', '031', '064', '105', '002', '047', '111', '110', '109', '077',
       '053', '080', '054', '005', '044', '085', '006', '086', '088', '042',
       '057', '090', '041', '058', '009', '094', '040', '095', '059', '010',
       '012', '102'],
      dtype='object', name='video_id')

In [32]:
train_generator.__getitem__(1)

(array([[[2.89615275e+02, 4.83716557e+02, 9.90902245e-01, ...,
          3.25787453e+02, 1.18806110e+02, 2.92700529e-03],
         [2.89888890e+02, 4.93060113e+02, 8.28785300e-01, ...,
          4.87645841e+02, 1.18806110e+02, 5.45266271e-03],
         [2.90458459e+02, 4.93060113e+02, 9.71991181e-01, ...,
          4.87645841e+02, 1.18806110e+02, 7.66080618e-03],
         ...,
         [3.34432155e+02, 5.02294505e+02, 5.61785161e-01, ...,
          3.34226272e+02, 2.42063637e+02, 1.44988298e-03],
         [3.34221808e+02, 5.01288193e+02, 9.53767359e-01, ...,
          2.79338076e+02, 2.42063637e+02, 3.09085846e-03],
         [3.32907221e+02, 4.86291516e+02, 7.99552679e-01, ...,
          2.79247801e+02, 1.48820802e+02, 2.85795331e-03]]]),
 array([[0.5]]))

In [33]:
model.fit(train_generator, epochs=10, validation_data=val_generator)

2021-11-01 11:49:50.344200: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7ff733dd7c10>