# Illustrate generator based pipeline

In [1]:
import os, sys
path_root = os.path.dirname(os.getcwd())
sys.path.insert(1, path_root)

from helpers import read_scores
from src.data_generators import RawDataGenerator
from src.data_selection import ScoreSelector

In [56]:
path_data = os.path.join(path_root, "modys-video_data")
path_metadata = os.path.join(path_data, "data_Scoring_DIS_proximal_trunk_V1.0.xlsx")

scores_df = read_scores(path_metadata)

In [57]:
scores_df.tail()

Unnamed: 0,video,ID,group,time,scorer,T0_DIS_D_TRUNK_R_tD,T0_DIS_D_TRUNK_R_tD_max,T0_DIS_D_TRUNK_R_tD_pscore,T0_DIS_CA_TRUNK_R_tD,T0_DIS_CA_TRUNK_R_tD_max,...,Left_arm_proximal_CA,T0_DIS_CA_RLP_R_tD_pscore.1,T0_DIS_CA_RLP_R_tA_pscore.1,Right_leg_proximal_CA,T0_DIS_CA_LLP_R_tD_pscore.1,T0_DIS_CA_LLP_R_tA_pscore.1,Left_leg_proximal_CA,Dystonia_mean,CA,video_id
156,12,1034.0,A,T0,CO,4.0,4.0,1.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65,0.0,12
157,102,1034.0,A,T12,CO,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65,0.0,102
158,12,1034.0,A,T0,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.45,0.0,12
159,63,1034.0,A,T3,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.45,0.0,63
160,102,1034.0,A,T12,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,102


In [58]:
scores_df["video_id"][0]

'001'

In [59]:
scores_selected = ScoreSelector(scores_to_use = ['T0_DIS_D_RLP_R_tA_pscore'],
                                scorer_to_use='CO').transform(scores_df)
scores_selected.head()

Unnamed: 0_level_0,T0_DIS_D_RLP_R_tA_pscore
video_id,Unnamed: 1_level_1
1,0.75
31,0.5
48,999.0
64,0.5
112,0.5


## Creat data generator

In [60]:
videos_folder = os.path.join(path_data, "data_stickfigure_coordinates_lying_V1.0", "data_lying_052929")

test_generator = RawDataGenerator(scores_selected, videos_folder=videos_folder)
X, y = test_generator.__getitem__(0)

In [62]:
X.shape, y.shape

((1, 501, 42), (1, 1))

In [63]:
y

array([[0.5]])

In [66]:
X.max(), X.min()

(510.4855487942696, 0.0002015531063079)

## Pipeline for training a deep neural network

### Define model architecture (here: simple CNN)

In [71]:
import tensorflow.keras as keras

n_timesteps, n_features = (X.shape[1], X.shape[0])
n_outputs = 1

# simple CNN
input_layer = keras.layers.Input(shape=(n_timesteps,n_features))
cnn_layer = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer)
cnn_layer = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(cnn_layer)
cnn_layer = keras.layers.MaxPooling1D(pool_size=2)(cnn_layer)
cnn_layer = keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_layer)
cnn_layer = keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_layer)
cnn_layer = keras.layers.MaxPooling1D(pool_size=2)(cnn_layer)
cnn_layer = keras.layers.Flatten()(cnn_layer)
cnn_layer = keras.layers.Dense(100)(cnn_layer)
output_layer = keras.layers.Dense(n_outputs)(cnn_layer)

model = Model(inputs=input_layer, outputs=output_layer)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 501, 1)]          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 499, 32)           128       
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 497, 32)           3104      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 248, 32)           0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 246, 64)           6208      
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 244, 64)           12352     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 122, 64)           0     

### Train/Test split

In [75]:
unique_video_ids = list(set(scores_selected.index))
len(unique_video_ids)

66

In [97]:
# here just hacky way to quickly do a split. Should later be done in generator?
n_train = 41
n_val = 10
n_test = len(unique_video_ids) - n_train - n_val

np.random.seed(0)
video_ids_train = np.random.choice(unique_video_ids, n_train)
video_ids_val = np.random.choice(list(set(unique_video_ids).difference(set(video_ids_train))), n_val)
video_ids_test = [x for x in unique_video_ids if not x in set(video_ids_train).union(set(video_ids_val))]

In [98]:
set(video_ids_val)

{'010', '012', '059', '076', '079', '084', '088', '096', '106'}

In [99]:
scores_df[scores_df["video_id"].isin(video_ids_val)]

Unnamed: 0,video,ID,group,time,scorer,T0_DIS_D_TRUNK_R_tD,T0_DIS_D_TRUNK_R_tD_max,T0_DIS_D_TRUNK_R_tD_pscore,T0_DIS_CA_TRUNK_R_tD,T0_DIS_CA_TRUNK_R_tD_max,...,Left_arm_proximal_CA,T0_DIS_CA_RLP_R_tD_pscore.1,T0_DIS_CA_RLP_R_tA_pscore.1,Right_leg_proximal_CA,T0_DIS_CA_LLP_R_tD_pscore.1,T0_DIS_CA_LLP_R_tA_pscore.1,Left_leg_proximal_CA,Dystonia_mean,CA,video_id
29,106,1007.0,C,T0,CO,4.0,4.0,1.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.75,0.0,106
31,106,1007.0,C,T0,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.55,0.0,106
38,76,1009.0,B,T12,CO,4.0,4.0,1.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.625,0.0,76
41,76,1009.0,B,T12,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.175,0.0,76
55,79,1013.0,A,T12,CO,4.0,4.0,1.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.725,0.0,79
58,79,1013.0,A,T12,KBO,4.0,4.0,1.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65,0.0,79
80,84,1018.0,B,T12,CO,4.0,4.0,1.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.775,0.0,84
83,84,1018.0,B,T12,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,84
100,88,1022.0,A,T12,CO,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.375,0.0,88
103,88,1022.0,A,T12,SFL,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,88


In [101]:
selector = ScoreSelector(scores_to_use = ['T0_DIS_D_RLP_R_tA_pscore'], scorer_to_use='CO')

scores_train = selector.transform(scores_df[scores_df["video_id"].isin(video_ids_train)])
scores_val = selector.transform(scores_df[scores_df["video_id"].isin(video_ids_val)])
scores_test = selector.transform(scores_df[scores_df["video_id"].isin(video_ids_test)])

In [103]:
train_generator = RawDataGenerator(scores_train, videos_folder=videos_folder)
val_generator = RawDataGenerator(scores_val, videos_folder=videos_folder)

In [104]:
model.compile(loss='mse', optimizer=keras.optimizers.Adam())

In [105]:
model.fit(train_generator, epochs=10, validation_data=val_generator)

Epoch 1/10


UnknownError:  IndexError: positional indexers are out-of-bounds
Traceback (most recent call last):

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\pandas\core\indexing.py", line 1474, in _get_list_axis
    return self.obj._take_with_is_copy(key, axis=axis)

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\pandas\core\generic.py", line 3599, in _take_with_is_copy
    result = self.take(indices=indices, axis=axis)

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\pandas\core\generic.py", line 3585, in take
    new_data = self._mgr.take(

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\pandas\core\internals\managers.py", line 1467, in take
    indexer = maybe_convert_indices(indexer, n)

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\pandas\core\indexers.py", line 265, in maybe_convert_indices
    raise IndexError("indices are out-of-bounds")

IndexError: indices are out-of-bounds


The above exception was the direct cause of the following exception:


Traceback (most recent call last):

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\tensorflow\python\ops\script_ops.py", line 249, in __call__
    ret = func(*args)

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 645, in wrapper
    return func(*args, **kwargs)

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 892, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\keras\engine\data_adapter.py", line 822, in wrapped_generator
    for data in generator_fn():

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\keras\engine\data_adapter.py", line 948, in generator_fn
    yield x[i]

  File "C:\HSD\OneDrive - Hochschule Düsseldorf\coding_projects\modys-video\src\data_generators.py", line 43, in __getitem__
    y = self._generate_y(video_ids)

  File "C:\HSD\OneDrive - Hochschule Düsseldorf\coding_projects\modys-video\src\data_generators.py", line 25, in _generate_y
    return self.scores_df.iloc[video_ids].values

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\pandas\core\indexing.py", line 895, in __getitem__
    return self._getitem_axis(maybe_callable, axis=axis)

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\pandas\core\indexing.py", line 1492, in _getitem_axis
    return self._get_list_axis(key, axis=axis)

  File "c:\users\florianhuber\anaconda3\envs\ms2deepscore\lib\site-packages\pandas\core\indexing.py", line 1477, in _get_list_axis
    raise IndexError("positional indexers are out-of-bounds") from err

IndexError: positional indexers are out-of-bounds


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]] [Op:__inference_train_function_1009]

Function call stack:
train_function
