# Model training lab

This is the notebook for loading and training models.
Furthermore it provides simple documentation for different approaches used for training a model.

Run the command below to see command-completion on pressing `TAB`.

## Prerequisits

In [19]:
# Imports
import os
import warnings
import tools
import models as c_models
import pandas as pd
import numpy as np
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.layers import SimpleRNN, Dense
from tensorflow.keras.layers import Bidirectional

# Ignore future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Root CSV files directory
dirname = "./data/"

# Constant frame count.
frames = 100

## Preparation Stage
### Load data and normalize
For training it's required to extend/reduce every dataset to n frames, where n is `frames`.

In [2]:
listfile = os.listdir(dirname)
contents = []
for wordname in listfile:
    if wordname == ".DS_Store":
        continue
    for csv in os.listdir(dirname + wordname):
        filepath = os.path.join(dirname, wordname, csv)
        content = pd.read_csv(filepath, sep=';')
        content = content.reindex(list(range(0, frames)), fill_value=0.5)
        content.fillna(0.5)
        contents.append((wordname, content))
data = contents

### Split data
Split the dataset up into the following segments:
1. Training Data: 66%
2. Test Data: 33%
3. Validation Data: None

In [30]:
features = [n[1] for n in data]
features = [f.to_numpy() for f in features]
labels = [n[0] for n in data]
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.33, random_state=42)

In [31]:
# Display train data
print("Total:", len(labels))
print("Training:", len(y_train), len(y_train) / len(labels) * 100)
print("Test:", len(y_test), len(y_test) / len(labels) * 100)
#print("Validation:", len(y_val), len(y_val) / len(labels) * 100)

Total: 347
Training: 232 66.85878962536023
Test: 115 33.14121037463977


### Tokenize (One Hot)

In [32]:
import tools
tokenizer = tools.tokenize(dirname)
print(tokenizer.word_index)
encoded_train=tokenizer.texts_to_sequences([y_train])[0]
encoded_test=tokenizer.texts_to_sequences([y_test])[0]
y_train = to_categorical(encoded_train)
y_test = to_categorical(encoded_test)
print(y_train)

{'welt': 1, 'deutschland': 2, 'hallo': 3, 'computer': 4}
[[0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]
 ...
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0.]]


## Training Stage

In [34]:
model = Sequential()
model.add(layers.LSTM(64, return_sequences=True,
               input_shape=(100, 86)))  # returns a sequence of vectors of dimension 32
model.add(layers.LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
model.add(layers.LSTM(32))  # return a single vector of dimension 32
model.add(layers.Dense(5, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [16]:
history=model.fit(x_train,y_train,epochs=100,batch_size=32,validation_data=(x_test,y_test))



ValueError: Error when checking input: expected lstm_9_input to have 3 dimensions, but got array with shape (100, 86)

In [10]:
print(x_train)

[      face_x    face_y  landmark_x_1  landmark_y_1  landmark_x_2  \
0   0.431214  0.323807      0.211799      0.685963      0.252615   
1   0.430695  0.327800      0.206106      0.642059      0.247807   
2   0.430177  0.329015      0.206230      0.601509      0.260793   
3   0.431656  0.331294      0.216902      0.565174      0.264502   
4   0.433426  0.331446      0.217643      0.538221      0.268412   
..       ...       ...           ...           ...           ...   
95  0.500000  0.500000      0.500000      0.500000      0.500000   
96  0.500000  0.500000      0.500000      0.500000      0.500000   
97  0.500000  0.500000      0.500000      0.500000      0.500000   
98  0.500000  0.500000      0.500000      0.500000      0.500000   
99  0.500000  0.500000      0.500000      0.500000      0.500000   

    landmark_y_2  landmark_x_3  landmark_y_3  landmark_x_4  landmark_y_4  ...  \
0       0.631678      0.263571      0.556937      0.265001      0.499218  ...   
1       0.589775    