# Illustrate generator based pipeline

In [1]:
cd ..

/Users/svenvanderburg/projects/modys-video


In [2]:
import numpy as np
import tensorflow.keras as keras

from src.helpers import read_scores
from src.data_generators import RawDataGenerator
from src.data_selection import MultipleScoreSelector
from src.ai_func import cross_validation_generator

### Read metadata

In [4]:
scores_df = read_scores()

## Pipeline for training a deep neural network

### Define model architecture (here: simple CNN)

In [8]:
# Generate some dev data to get X shape
selector = MultipleScoreSelector(scores_to_use=['D_LLP_R_tA_pscore'], scorer_to_use=1)
dev_selection = selector.transform(scores_df)
dev_generator = RawDataGenerator(dev_selection, videos_folder=path_data)
X, y = dev_generator.__getitem__(0)
n_timesteps, n_features = (X.shape[1], X.shape[2])



In [9]:
n_outputs = 1

# simple CNN
input_layer = keras.layers.Input(shape=(n_timesteps,n_features))
cnn_layer = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer)
cnn_layer = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(cnn_layer)
cnn_layer = keras.layers.MaxPooling1D(pool_size=2)(cnn_layer)
cnn_layer = keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_layer)
cnn_layer = keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')(cnn_layer)
cnn_layer = keras.layers.MaxPooling1D(pool_size=2)(cnn_layer)
cnn_layer = keras.layers.Flatten()(cnn_layer)
cnn_layer = keras.layers.Dense(100)(cnn_layer)
output_layer = keras.layers.Dense(n_outputs)(cnn_layer)

model = keras.Model(inputs=input_layer, outputs=output_layer)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 501, 42)]         0         
_________________________________________________________________
conv1d (Conv1D)              (None, 499, 32)           4064      
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 497, 32)           3104      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 248, 32)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 246, 64)           6208      
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 244, 64)           12352     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 122, 64)           0     

2021-11-02 10:29:45.336552: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
model.compile(loss='mse', optimizer=keras.optimizers.Adam())

### Data selection + generation

In [11]:
selector = MultipleScoreSelector(scores_to_use=['D_LLP_R_tA_pscore'], scorer_to_use=1)
selected_data = selector.transform(scores_df)
cross_val = cross_validation_generator(selected_data)


## Train network

In [12]:
def train_network(train_generator, val_generator):
    model.fit(train_generator, epochs=2, validation_data=val_generator)

### 

In [14]:
for i_split, (train_scores, test_scores) in enumerate(cross_val):
    print(f'Fitting for 5-fold split {i_split}')
    train_generator = RawDataGenerator(train_scores, videos_folder=path_data)
    # We use testset for validation here just for demo purposes
    val_generator = RawDataGenerator(test_scores, videos_folder=path_data)
    train_network(train_generator, val_generator)

Fitting for 5-fold split 0
Epoch 1/2
Epoch 2/2
Fitting for 5-fold split 1
Epoch 1/2
Epoch 2/2
