# Model training lab

This is the notebook for loading and training models.
Furthermore it provides simple documentation for different approaches used for training a model.

Run the command below to see command-completion on pressing `TAB`.

## Prerequisits

In [None]:
# Imports
import os
import warnings
import tools
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.layers import SimpleRNN, Dense
from tensorflow.keras.layers import Bidirectional
from matplotlib import pyplot



# Ignore future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Root CSV files directory
dirname = "./data/"  

# Constant frame count.
frames = 100


#Preparation Stage - Load data and normalize
listfile = os.listdir(dirname)
data = []
for wordname in listfile:
    if wordname == ".DS_Store":
        continue
    for csv in os.listdir(dirname + wordname):
        filepath = os.path.join(dirname, wordname, csv)
        content = pd.read_csv(filepath, sep=';')
        content = content.reindex(list(range(0, frames)), fill_value=0.0)
        content.fillna(0.0, inplace = True) 
        data.append((wordname, content))
        
#Split data 60-20-20

features = [n[1] for n in data]
features = [f.to_numpy() for f in features]
labels = [n[0] for n in data]
x_train, x_val, y_train, y_val = train_test_split(features, labels, test_size=0.40, random_state=42)
x_val, x_test, y_val, y_test = train_test_split(x_val, y_val, test_size=0.50, random_state=42)

#Enumerate
def printCountDataSets(dataset):
    wortCounter = []
    #Liste mit einmaligen Labels erstellen
    labels = sorted(set(dataset), key=dataset.index)
    #Liste nochmal Alphabetisch sortieren
    labels = sorted(labels)
    for label in labels:
        wortCounter.append(0)
    for row in dataset:
        for i in range(len(labels)):
            if str(labels[i]).startswith(row):
                wortCounter[i] += 1
    for i in range(len(labels)):
        print(labels[i], ': ', wortCounter[i], end =";  ")
    print(' ')        
    
print('Amount Datasets by word total:')
printCountDataSets(labels)
print('')

print('Amount Datasets by word training:')
printCountDataSets(y_train)
print('')

print('Amount Datasets by word validiation:')
printCountDataSets(y_val)
print('')

print('Amount Datasets by word test:')
printCountDataSets(y_test)
print('')


# Display data distribution
print('Distribution of data:')
print("Amount total:", len(labels))
print("Amount training:", len(y_train))
print("Amount validiation:", len(y_val))
print("Amount test:", len(y_test))
print('')

#Tokenize (One Hot)
tokenizer = tools.tokenize(dirname)
print('Tokens:')
print(tokenizer.word_index)
print('')
with open('tokens_json.txt', 'w') as outfile:
    outfile.write(tokenizer.to_json())

encoded_train=tokenizer.texts_to_sequences([y_train])[0]
encoded_val=tokenizer.texts_to_sequences([y_val])[0]
encoded_test=tokenizer.texts_to_sequences([y_test])[0]

y_train = to_categorical(encoded_train)
y_val = to_categorical(encoded_val)
y_test = to_categorical(encoded_test)

print('Categories in OneHot anotation:')
print(y_train)
print('')
# Making numpy arrays
x_train=np.array(x_train)
y_train=np.array(y_train)
x_val=np.array(x_val)
y_val=np.array(y_val)
x_test=np.array(x_test)
y_test=np.array(y_test)

print('Dataset coordinate Values:')
print(x_train)
print('')

#import winsound
#def finished(num):
#    frequency = 2000  # Set Frequency To 2500 Hertz
#    duration = 500  # Set Duration To 1000 ms == 1 second
#    for i in range(0, num):
#        winsound.Beep(frequency, duration)
    

## Training Stage
Configure the model and train it.

Metrics:
<div float="right">
    <img src="assets/accuracy.png" width="400"> 
    <img src="assets/precision_recall_formula.png" width="400">
</div>
<img src="assets/precision_recall.png" width="1000">


### <span style="color:blue"> Hyperparametertuned LSTM </span>
##### Here it is necessary to install the Keras-Tuner Module by executing:
#####  <span style="color:green"> via Conda:</span>
conda install -c conda-forge keras-tuner
#####  <span style="color:green"> for pip:</span>
pip install keras-tuner

Right now there are three different builds we are testing:
- classic LSTM
- CuDNNLSTM
- bidriectional LSTM


In [None]:
from kerastuner.tuners import RandomSearch
from kerastuner.tuners import Hyperband
from kerastuner.engine.hyperparameters import HyperParameters
from time import time, strftime


starttime= strftime("%Y_%m_%d_%H%M%S")
LOG_DIR = "C:\ML\Optimization_"f"{starttime}" #<-In Windows below Log_dir Path will maybe be too long for Windows to handle, so use a shorter path like this here
#LOG_DIR = "./Optimization_"f"{starttime}" # LOG_DIR holds json files with information and a model of each single trial

def build_model_lstm(hp):
    model = Sequential()
    
    model.add(layers.LSTM(hp.Int("LSTM_input", min_value =64, max_value=256,step=64, default=64), #kerastuner will randomly choose a value for nodes between 128 and 256 in steps of 64
                            return_sequences=True,
                            input_shape=(x_train.shape[1], x_train.shape[2])))
    
    for i in range(hp.Int("n_layers" , 1, 3)):    #number of layers ramdom between 1 an 3
        model.add(layers.LSTM(hp.Int(f"LSTM_{i}_units", min_value =64, max_value=256,step=64, default=64),return_sequences=True))
    
    model.add(layers.LSTM(hp.Int(f"LSTM_End", min_value =32, max_value=128,step=32, default=32)))
    model.add(layers.Dense(12, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  #optimizer=hp.Choice('optimizer',values=['Adam','RMSprop','SGD']),
                  optimizer=hp.Choice('optimizer',values=['Adagrad','Adamax','Adam','RMSprop']),
                  metrics=['accuracy',tf.keras.metrics.Precision(),tf.keras.metrics.Recall()])
    model.summary()
    print(model.optimizer.get_config()["name"])
    print('')
    return model



def build_model_CuDNNLSTM(hp):
    model = Sequential()
    

    
    model.add(tf.compat.v1.keras.layers.CuDNNLSTM(hp.Int("LSTM_input", min_value =64, max_value=256,step=64, default=64), #kerastuner will randomly choose a value for nodes between 128 and 256 in steps of 64
                            return_sequences=True,
                            input_shape=(x_train.shape[1], x_train.shape[2])))
    
    for i in range(hp.Int("n_layers" , 1, 3)):    #number of layers ramdom between 1 an 3
        model.add(tf.compat.v1.keras.layers.CuDNNLSTM(hp.Int(f"LSTM_{i}_units", min_value =64, max_value=256,step=64, default=64),return_sequences=True))
    
    model.add(tf.compat.v1.keras.layers.CuDNNLSTM(hp.Int(f"LSTM_End", min_value =32, max_value=128,step=32, default=32)))
    model.add(layers.Dense(12, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  #optimizer=hp.Choice('optimizer',values=['Adam','RMSprop','SGD']),
                  optimizer=hp.Choice('optimizer',values=['Adagrad','Adamax','Adam','RMSprop']),
                  metrics=['accuracy',tf.keras.metrics.Precision(),tf.keras.metrics.Recall()])
    model.summary()
    print(model.optimizer.get_config()["name"])
    print('')
    return model



def build_model_bdlstm(hp):
    model = Sequential()
    model.add(Bidirectional(layers.LSTM(hp.Int("LSTM_input", min_value =64, max_value=256,step=64, default=64),
                                        return_sequences=True),
                                        input_shape=(100, 86)))
    
    for i in range(hp.Int("n_layers" , 1, 3)):    #number of layers ramdom between 1 an 3
        model.add(layers.Bidirectional(layers.LSTM(hp.Int(f"LSTM_{i}_units", min_value =64, max_value=256,step=64, default=64),return_sequences=True)))
    
    model.add(layers.Bidirectional(layers.LSTM(hp.Int(f"LSTM_End", min_value =32, max_value=128,step=32, default=32))))
    model.add(layers.Dense(12, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer=hp.Choice('optimizer',values=['Adagrad','Adamax','Adam','RMSprop']),
                  metrics=['accuracy']) 
    model.summary()
    print(model.optimizer.get_config()["name"])
    print('')
    return model






###   <span style="color:red">Necesarry only in case of using Nvidia GPU  </span>

In [None]:
physical_devices = tf.config.list_physical_devices('GPU') 
print("Num GPUs:", len(physical_devices)) 

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

# Different Keras-Tuner Approaches
### 1 - RandomSearch
Parameter of variables are ranomly used (number of layers, number of nodes) and "best" model is chosen.

In [12]:
tuner  = RandomSearch(
    build_model_CuDNNLSTM,     #Function to use search in... See different builds above
    objective = "val_accuracy",  #Chooses "best model" looking for highest value of val_accuracy
    max_trials = 30,       # Number of different combinations tried Nodes and layers
    executions_per_trial = 1, 
    directory = LOG_DIR,
    project_name='SignLagnuageModelOptimization'
    )

#tuner.search_space_summary()

tuner.search(x=x_train,      #syntax just like in fit
                y= y_train,
            epochs=200,
            batch_size=32,
            validation_data=(x_val,y_val),
            verbose=2
            )

print(tuner.get_best_hyperparameters()[0].values)
print(tuner.results_summary())

finished(8)

### 2 - Hyperband
Variation of RandomSearch http://jmlr.org/papers/volume18/16-558/16-558.pdf

In [None]:
tuner  = Hyperband(
    build_model,
    objective = "val_accuracy",
    hyperband_iterations=2,
    max_epochs=150,
    directory = LOG_DIR,
    project_name='SignLagnuageModelOptimization'
    )

#tuner.search_space_summary()

tuner.search(x=x_train, 
            y= y_train,
            batch_size=32,
            validation_data=(x_val,y_val))

print(tuner.get_best_hyperparameters()[0].values)
print(tuner.results_summary())

finished(8)

In [15]:
#Laut Randomsearch bestes Model am 23.06.2020

model = Sequential()
model.add(layers.LSTM(128, return_sequences=True,
               input_shape=(x_train.shape[1], x_train.shape[2])))
model.add(layers.LSTM(64, return_sequences=True)) 
model.add(layers.LSTM(96))  
model.add(layers.Dense(12, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy',tf.keras.metrics.Precision(),tf.keras.metrics.Recall()])
model.summary()

history=model.fit(x_train,y_train,epochs=170,validation_data=(x_val,y_val),shuffle=False,verbose=2)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_7 (LSTM)                (None, 100, 128)          110080    
_________________________________________________________________
lstm_8 (LSTM)                (None, 100, 64)           49408     
_________________________________________________________________
lstm_9 (LSTM)                (None, 96)                61824     
_________________________________________________________________
dense_2 (Dense)              (None, 12)                1164      
Total params: 222,476
Trainable params: 222,476
Non-trainable params: 0
_________________________________________________________________


In [None]:
pyplot.plot(history.history['loss'])
pyplot.plot(history.history['val_loss'])
pyplot.title('model train vs validation loss')
pyplot.ylabel('loss')
pyplot.xlabel('epoch')
pyplot.legend(['train', 'validation'], loc='upper right')
pyplot.show()

pyplot.plot(history.history['accuracy'])
pyplot.plot(history.history['val_accuracy'])
pyplot.title('model train vs validation accuracy')
pyplot.ylabel('loss')
pyplot.xlabel('epoch')
pyplot.legend(['train', 'validation'], loc='upper right')
pyplot.show()

## Export tuner object into pickle file
so it can be used in other scripts

In [None]:
import pickle

with open(f"tuner_"f"{starttime}.pkl", "wb") as f:
    pickle.dump(tuner, f)
    

## Get best Trial from Tuner Object

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]
bestmodel= tuner.hypermodel.build(best_hp)

bestmodel.summary()


In [16]:
#tmp_chekpoints= "tmp\epoch{epoch:02d}-{val_accuracy:.2f}-{val_loss:.2f}.hdf5"
tmp_chekpoints= "C:\\ML\\checkpoints\\tmp\\epoch{epoch:02d}-{val_accuracy:.2f}-{val_loss:.2f}.hdf5"

#csv_log = tf.keras.callbacks.CSVLogger("log.csv", separator=',', append=False)
csv_log = tf.keras.callbacks.CSVLogger("C:\ML\logs\log.csv", separator=',', append=False)

#tb = tf.keras.callbacks.TensorBoard(log_dir='logs', histogram_freq=1, write_graph=False, write_images=False, update_freq='epoch', profile_batch=2, embeddings_freq=1, embeddings_metadata=None)
tb = tf.keras.callbacks.TensorBoard(log_dir='C:\ML\logs', histogram_freq=1, write_graph=False, write_images=False, update_freq='epoch', profile_batch=2, embeddings_freq=1, embeddings_metadata=None)
es = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=20, verbose=0, mode='max', baseline=None, restore_best_weights=True)
chk= tf.keras.callbacks.ModelCheckpoint(tmp_chekpoints, monitor='val_accuracy', verbose=0, save_best_only=False, save_weights_only=False, mode='max', save_freq='epoch')


history = model.fit(x_train,y_train,epochs=200,batch_size=32, validation_data=(x_val,y_val),shuffle=False, verbose=2, callbacks=[csv_log, chk])

Epoch 1/200
14/14 - 1s - loss: 2.4135 - accuracy: 0.1244 - precision_2: 0.5000 - recall_2: 0.0070 - val_loss: 2.2545 - val_accuracy: 0.1690 - val_precision_2: 0.2727 - val_recall_2: 0.0211
Epoch 2/200
14/14 - 0s - loss: 2.1473 - accuracy: 0.2042 - precision_2: 0.5385 - recall_2: 0.0329 - val_loss: 1.9987 - val_accuracy: 0.1620 - val_precision_2: 0.0000e+00 - val_recall_2: 0.0000e+00
Epoch 3/200
14/14 - 0s - loss: 1.9705 - accuracy: 0.2254 - precision_2: 0.6667 - recall_2: 0.0047 - val_loss: 1.9349 - val_accuracy: 0.2324 - val_precision_2: 0.3333 - val_recall_2: 0.0211
Epoch 4/200
14/14 - 0s - loss: 1.8903 - accuracy: 0.2324 - precision_2: 0.4762 - recall_2: 0.0235 - val_loss: 1.9650 - val_accuracy: 0.2254 - val_precision_2: 0.0000e+00 - val_recall_2: 0.0000e+00
Epoch 5/200
14/14 - 0s - loss: 1.8208 - accuracy: 0.2559 - precision_2: 0.5682 - recall_2: 0.0587 - val_loss: 1.8764 - val_accuracy: 0.2324 - val_precision_2: 0.3750 - val_recall_2: 0.0211
Epoch 6/200
14/14 - 0s - loss: 1.7582 -

Epoch 45/200
14/14 - 0s - loss: 0.9526 - accuracy: 0.6362 - precision_2: 0.8059 - recall_2: 0.4484 - val_loss: 1.6407 - val_accuracy: 0.3803 - val_precision_2: 0.5116 - val_recall_2: 0.3099
Epoch 46/200
14/14 - 0s - loss: 0.9579 - accuracy: 0.6338 - precision_2: 0.7581 - recall_2: 0.4413 - val_loss: 1.6465 - val_accuracy: 0.4507 - val_precision_2: 0.6067 - val_recall_2: 0.3803
Epoch 47/200
14/14 - 0s - loss: 0.9195 - accuracy: 0.6362 - precision_2: 0.7595 - recall_2: 0.5188 - val_loss: 1.3503 - val_accuracy: 0.5493 - val_precision_2: 0.6800 - val_recall_2: 0.3592
Epoch 48/200
14/14 - 0s - loss: 1.0755 - accuracy: 0.6009 - precision_2: 0.7235 - recall_2: 0.4484 - val_loss: 1.5986 - val_accuracy: 0.4085 - val_precision_2: 0.5479 - val_recall_2: 0.2817
Epoch 49/200
14/14 - 0s - loss: 1.1109 - accuracy: 0.5399 - precision_2: 0.7897 - recall_2: 0.3615 - val_loss: 1.5237 - val_accuracy: 0.4155 - val_precision_2: 0.5789 - val_recall_2: 0.3099
Epoch 50/200
14/14 - 0s - loss: 0.9767 - accuracy:

Epoch 89/200
14/14 - 0s - loss: 0.4413 - accuracy: 0.8333 - precision_2: 0.8601 - recall_2: 0.7934 - val_loss: 1.5056 - val_accuracy: 0.5704 - val_precision_2: 0.5821 - val_recall_2: 0.5493
Epoch 90/200
14/14 - 0s - loss: 0.3482 - accuracy: 0.8615 - precision_2: 0.8797 - recall_2: 0.8239 - val_loss: 1.5086 - val_accuracy: 0.5775 - val_precision_2: 0.5906 - val_recall_2: 0.5282
Epoch 91/200
14/14 - 0s - loss: 0.5213 - accuracy: 0.8146 - precision_2: 0.8471 - recall_2: 0.7934 - val_loss: 1.4205 - val_accuracy: 0.5845 - val_precision_2: 0.6116 - val_recall_2: 0.5211
Epoch 92/200
14/14 - 0s - loss: 0.4446 - accuracy: 0.8451 - precision_2: 0.8699 - recall_2: 0.8005 - val_loss: 1.4344 - val_accuracy: 0.5704 - val_precision_2: 0.6094 - val_recall_2: 0.5493
Epoch 93/200
14/14 - 0s - loss: 0.4056 - accuracy: 0.8474 - precision_2: 0.8990 - recall_2: 0.8146 - val_loss: 1.3958 - val_accuracy: 0.6408 - val_precision_2: 0.6508 - val_recall_2: 0.5775
Epoch 94/200
14/14 - 0s - loss: 0.3327 - accuracy:

Epoch 132/200
14/14 - 0s - loss: 0.1130 - accuracy: 0.9601 - precision_2: 0.9600 - recall_2: 0.9577 - val_loss: 1.6942 - val_accuracy: 0.6127 - val_precision_2: 0.6397 - val_recall_2: 0.6127
Epoch 133/200
14/14 - 0s - loss: 0.1011 - accuracy: 0.9577 - precision_2: 0.9645 - recall_2: 0.9554 - val_loss: 1.5623 - val_accuracy: 0.6479 - val_precision_2: 0.6667 - val_recall_2: 0.6338
Epoch 134/200
14/14 - 0s - loss: 0.0858 - accuracy: 0.9695 - precision_2: 0.9717 - recall_2: 0.9671 - val_loss: 1.8197 - val_accuracy: 0.6479 - val_precision_2: 0.6475 - val_recall_2: 0.6338
Epoch 135/200
14/14 - 0s - loss: 0.0838 - accuracy: 0.9718 - precision_2: 0.9718 - recall_2: 0.9718 - val_loss: 1.5597 - val_accuracy: 0.6549 - val_precision_2: 0.6643 - val_recall_2: 0.6549
Epoch 136/200
14/14 - 0s - loss: 0.0740 - accuracy: 0.9789 - precision_2: 0.9811 - recall_2: 0.9765 - val_loss: 1.5722 - val_accuracy: 0.6690 - val_precision_2: 0.6835 - val_recall_2: 0.6690
Epoch 137/200
14/14 - 0s - loss: 0.0678 - acc

Epoch 175/200
14/14 - 0s - loss: 0.0873 - accuracy: 0.9742 - precision_2: 0.9787 - recall_2: 0.9718 - val_loss: 1.3901 - val_accuracy: 0.6901 - val_precision_2: 0.6906 - val_recall_2: 0.6761
Epoch 176/200
14/14 - 0s - loss: 0.0720 - accuracy: 0.9789 - precision_2: 0.9788 - recall_2: 0.9765 - val_loss: 1.4882 - val_accuracy: 0.6831 - val_precision_2: 0.6861 - val_recall_2: 0.6620
Epoch 177/200
14/14 - 0s - loss: 0.0778 - accuracy: 0.9765 - precision_2: 0.9810 - recall_2: 0.9718 - val_loss: 1.5805 - val_accuracy: 0.6761 - val_precision_2: 0.6857 - val_recall_2: 0.6761
Epoch 178/200
14/14 - 0s - loss: 0.0580 - accuracy: 0.9883 - precision_2: 0.9882 - recall_2: 0.9859 - val_loss: 1.5455 - val_accuracy: 0.6549 - val_precision_2: 0.6838 - val_recall_2: 0.6549
Epoch 179/200
14/14 - 0s - loss: 0.0882 - accuracy: 0.9695 - precision_2: 0.9718 - recall_2: 0.9695 - val_loss: 1.5596 - val_accuracy: 0.6549 - val_precision_2: 0.6739 - val_recall_2: 0.6549
Epoch 180/200
14/14 - 0s - loss: 0.1710 - acc

### Diagnostic Plots

The training history of your LSTM models can be used to diagnose the behavior of your model.

You can plot the performance of your model using the Matplotlib library. For example, you can plot training loss vs test loss as follows:

In [None]:
pyplot.plot(history.history['loss'])
pyplot.plot(history.history['val_loss'])
pyplot.title('model train vs validation loss')
pyplot.ylabel('loss')
pyplot.xlabel('epoch')
pyplot.legend(['train', 'validation'], loc='upper right')
pyplot.savefig("C:/ML/loss"f"{starttime}.png")
pyplot.show()

pyplot.plot(history.history['accuracy'])
pyplot.plot(history.history['val_accuracy'])
pyplot.title('model train vs validation accuracy')
pyplot.ylabel('accuracy')
pyplot.xlabel('epoch')
pyplot.legend(['train', 'validation'], loc='lower right')
pyplot.savefig("C:/ML/accuracy_"f"{starttime}.png")
pyplot.show()

#### Underfit Example
Running this example produces a plot of train and validation loss showing the characteristic of an underfit model. In this case, performance may be improved by increasing the number of training epochs.


<img src="assets/Diagnostic-Line-Plot-Showing-an-Underfit-Model.png" width="400">


Running this example shows the characteristic of an underfit model that appears under-provisioned.
In this case, performance may be improved by increasing the capacity of the model, such as the number of memory cells in a hidden layer or number of hidden layers.

<img src="assets/Diagnostic-Line-Plot-Showing-an-Underfit-Model-via-Status.png" width="400">

#### Good Fit Example
Running the example creates a line plot showing the train and validation loss meeting.
Ideally, we would like to see model performance like this if possible, although this may not be possible on challenging problems with a lot of data.

<img src="assets/Diagnostic-Line-Plot-Showing-a-Good-Fit-for-a-Model.png" width="400">

#### Overfit Example
Running this example creates a plot showing the characteristic inflection point in validation loss of an overfit model.
This may be a sign of too many training epochs.
In this case, the model training could be stopped at the inflection point. Alternately, the number of training examples could be increased.

<img src="assets/Diagnostic-Line-Plot-Showing-an-Overfit-Model.png" width="400">

### Evaluate

In [None]:
#model = tf.keras.models.load_model('./tmp/epoch49-0.90-0.39.hdf5')


#bestmodel.evaluate(x=x_test, y=y_test, verbose=2)
model.evaluate(x=x_test, y=y_test, verbose=2)


### Save model

In [None]:
bestmodel.save("sign_lang_recognition_tuned.h5")