# Illustrate generator based pipeline

In [1]:
cd ..

/Users/svenvanderburg/projects/modys-video


In [2]:
import numpy as np
import tensorflow.keras as keras

from helpers import read_scores
from src.data_generators import RawDataGenerator
from src.data_selection import MultipleScoreSelector

### Read metadata

In [3]:
path_data = "data/data_lying_052929"
path_metadata = "data/data_Scoring_DIS_proximal_trunk_V1.0.xlsx"

scores_df = read_scores(path_metadata)

In [4]:
scores_df.tail()

Unnamed: 0,video,ID,group,time,scorer,T0_DIS_D_TRUNK_R_tD,T0_DIS_D_TRUNK_R_tD_max,T0_DIS_D_TRUNK_R_tD_pscore,T0_DIS_CA_TRUNK_R_tD,T0_DIS_CA_TRUNK_R_tD_max,...,Left_arm_proximal_CA,T0_DIS_CA_RLP_R_tD_pscore.1,T0_DIS_CA_RLP_R_tA_pscore.1,Right_leg_proximal_CA,T0_DIS_CA_LLP_R_tD_pscore.1,T0_DIS_CA_LLP_R_tA_pscore.1,Left_leg_proximal_CA,Dystonia_mean,CA,video_id
156,12,1034,A,T0,CO,4.0,4.0,1.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65,0.0,12
157,102,1034,A,T12,CO,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65,0.0,102
158,12,1034,A,T0,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.45,0.0,12
159,63,1034,A,T3,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.45,0.0,63
160,102,1034,A,T12,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,102


In [5]:
scores_df["video_id"][0]

'001'

In [6]:
scores_selected = MultipleScoreSelector(scores_to_use = ['T0_DIS_D_RLP_R_tA_pscore'],
                                scorer_to_use='CO').transform(scores_df)
scores_selected.head()

Unnamed: 0_level_0,T0_DIS_D_RLP_R_tA_pscore
video_id,Unnamed: 1_level_1
1,0.75
31,0.5
48,
64,0.5
112,0.5


## Creat data generator

In [7]:
videos_folder = ("data/data_lying_052929")

test_generator = RawDataGenerator(scores_selected, videos_folder=videos_folder)
X, y = test_generator.__getitem__(0)

In [8]:
X.shape, y.shape

((1, 501, 42), (1, 1))

In [9]:
y

array([[0.75]])

In [10]:
X.max(), X.min()

(510.4855487942696, 0.0002015531063079)

## Pipeline for training a deep neural network

### Define model architecture (here: simple CNN)

In [11]:

n_timesteps, n_features = (X.shape[1], X.shape[2])
n_outputs = 1

# simple CNN
input_layer = keras.layers.Input(shape=(n_timesteps,n_features))
cnn_layer = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer)
cnn_layer = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(cnn_layer)
cnn_layer = keras.layers.MaxPooling1D(pool_size=2)(cnn_layer)
cnn_layer = keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_layer)
cnn_layer = keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_layer)
cnn_layer = keras.layers.MaxPooling1D(pool_size=2)(cnn_layer)
cnn_layer = keras.layers.Flatten()(cnn_layer)
cnn_layer = keras.layers.Dense(100)(cnn_layer)
output_layer = keras.layers.Dense(n_outputs)(cnn_layer)

model = keras.Model(inputs=input_layer, outputs=output_layer)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 501, 42)]         0         
_________________________________________________________________
conv1d (Conv1D)              (None, 499, 32)           4064      
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 497, 32)           3104      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 248, 32)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 246, 64)           6208      
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 244, 64)           12352     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 122, 64)           0     

2021-09-20 10:36:57.615255: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Train/Test split

In [12]:
unique_video_ids = list(set(scores_selected.index))
len(unique_video_ids)

66

In [13]:
# here just hacky way to quickly do a split. Should later be done in generator?
n_train = 41
n_val = 10
n_test = len(unique_video_ids) - n_train - n_val

np.random.seed(0)
video_ids_train = np.random.choice(unique_video_ids, n_train)
video_ids_val = np.random.choice(list(set(unique_video_ids).difference(set(video_ids_train))), n_val)
video_ids_test = [x for x in unique_video_ids if not x in set(video_ids_train).union(set(video_ids_val))]

In [14]:
set(video_ids_val)

{'031', '042', '054', '056', '077', '084', '086', '087', '094'}

In [15]:
scores_df[scores_df["video_id"].isin(video_ids_val)]

Unnamed: 0,video,ID,group,time,scorer,T0_DIS_D_TRUNK_R_tD,T0_DIS_D_TRUNK_R_tD_max,T0_DIS_D_TRUNK_R_tD_pscore,T0_DIS_CA_TRUNK_R_tD,T0_DIS_CA_TRUNK_R_tD_max,...,Left_arm_proximal_CA,T0_DIS_CA_RLP_R_tD_pscore.1,T0_DIS_CA_RLP_R_tA_pscore.1,Right_leg_proximal_CA,T0_DIS_CA_LLP_R_tD_pscore.1,T0_DIS_CA_LLP_R_tA_pscore.1,Left_leg_proximal_CA,Dystonia_mean,CA,video_id
1,31,1001,A,T12,CO,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.625,0.0,31
4,31,1001,A,T12,KBO,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.325,0.0,31
45,77,1011,A,T12,CO,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.0,77
48,77,1011,A,T12,KBO,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65,0.0,77
69,54,1016,A,T0,CO,4.0,4.0,1.0,0.0,4.0,...,0.0,0.25,0.25,0.25,0.25,0.25,0.25,0.725,0.1,54
71,54,1016,A,T0,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.375,0.0,54
80,84,1018,B,T12,CO,4.0,4.0,1.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.775,0.0,84
83,84,1018,B,T12,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,84
90,86,1020,B,T12,CO,2.0,4.0,0.5,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.625,0.0,86
93,86,1020,B,T12,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,86


### Select scores for train, val, and test set

In [16]:
selector = MultipleScoreSelector(scores_to_use = ['T0_DIS_D_RLP_R_tA_pscore'], scorer_to_use='CO')

scores_train = selector.transform(scores_df[scores_df["video_id"].isin(video_ids_train)])
scores_val = selector.transform(scores_df[scores_df["video_id"].isin(video_ids_val)])
scores_test = selector.transform(scores_df[scores_df["video_id"].isin(video_ids_test)])

In [17]:
train_generator = RawDataGenerator(scores_train, videos_folder=videos_folder)
val_generator = RawDataGenerator(scores_val, videos_folder=videos_folder)

In [18]:
model.compile(loss='mse', optimizer=keras.optimizers.Adam())

### Train neural network

In [19]:
train_generator.batch_size

1

In [20]:
len(train_generator)

31

In [21]:
train_generator.indexes

Index(['001', '112', '105', '073', '074', '075', '111', '003', '046', '078',
       '053', '004', '080', '045', '083', '055', '085', '088', '007', '090',
       '008', '091', '092', '093', '009', '040', '095', '059', '096', '039',
       '098'],
      dtype='object', name='video_id')

In [23]:
train_generator.__getitem__(1)

(array([[[2.81831064e+02, 1.01761291e+02, 2.03192234e-04, ...,
          0.00000000e+00, 0.00000000e+00, 5.60283661e-06],
         [2.81831064e+02, 4.47730581e+02, 9.97238636e-01, ...,
          2.38452076e+02, 7.72632854e+01, 7.99715519e-04],
         [2.82381213e+02, 4.48443518e+02, 9.95797396e-01, ...,
          2.45238032e+02, 1.35245957e+02, 1.47625804e-03],
         ...,
         [3.19379115e+02, 5.00892663e+02, 9.65971649e-01, ...,
          2.44880940e+02, 1.27505754e+02, 4.70587611e-03],
         [3.19283831e+02, 5.00892663e+02, 9.68499780e-01, ...,
          2.40023229e+02, 1.27505754e+02, 7.49447942e-03],
         [3.18971776e+02, 5.00892663e+02, 9.92583632e-01, ...,
          2.40023229e+02, 1.17591729e+02, 6.36622310e-03]]]),
 array([[0.5]]))

In [28]:
model.fit(train_generator, epochs=10, validation_data=val_generator)

2021-09-20 10:37:09.165634: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/10
 5/31 [===>..........................] - ETA: 0s - loss: nan 

2021-09-20 10:37:10.635177: W tensorflow/core/framework/op_kernel.cc:1680] Unknown: FileNotFoundError: Video with video_id: 096 not found
Traceback (most recent call last):

  File "/Users/svenvanderburg/opt/anaconda3/envs/ssi-ml-video-dys/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 249, in __call__
    ret = func(*args)

  File "/Users/svenvanderburg/opt/anaconda3/envs/ssi-ml-video-dys/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 645, in wrapper
    return func(*args, **kwargs)

  File "/Users/svenvanderburg/opt/anaconda3/envs/ssi-ml-video-dys/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 892, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "/Users/svenvanderburg/opt/anaconda3/envs/ssi-ml-video-dys/lib/python3.8/site-packages/keras/engine/data_adapter.py", line 822, in wrapped_generator
    for data in generator_fn():

  File "/Users/svenvanderburg/opt/anac

UnknownError:  FileNotFoundError: Video with video_id: 096 not found
Traceback (most recent call last):

  File "/Users/svenvanderburg/opt/anaconda3/envs/ssi-ml-video-dys/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 249, in __call__
    ret = func(*args)

  File "/Users/svenvanderburg/opt/anaconda3/envs/ssi-ml-video-dys/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 645, in wrapper
    return func(*args, **kwargs)

  File "/Users/svenvanderburg/opt/anaconda3/envs/ssi-ml-video-dys/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 892, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "/Users/svenvanderburg/opt/anaconda3/envs/ssi-ml-video-dys/lib/python3.8/site-packages/keras/engine/data_adapter.py", line 822, in wrapped_generator
    for data in generator_fn():

  File "/Users/svenvanderburg/opt/anaconda3/envs/ssi-ml-video-dys/lib/python3.8/site-packages/keras/engine/data_adapter.py", line 948, in generator_fn
    yield x[i]

  File "/Users/svenvanderburg/projects/modys-video/src/data_generators.py", line 85, in __getitem__
    X = self._generate_X(indexes)

  File "/Users/svenvanderburg/projects/modys-video/src/data_generators.py", line 99, in _generate_X
    df_video = read_video(video_id, self.videos_folder)

  File "/Users/svenvanderburg/projects/modys-video/helpers.py", line 170, in read_video
    raise FileNotFoundError(f'Video with video_id: {video_id} not found')

FileNotFoundError: Video with video_id: 096 not found


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]] [Op:__inference_train_function_981]

Function call stack:
train_function
