# Predict

This is the notebook for loading and training models.
Furthermore it provides simple documentation for different approaches used for training a model.

Run the command below to see command-completion on pressing `TAB`.

### Imports

In [1]:
# Imports
import os
import warnings
import tools
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras import layers, models
from matplotlib import pyplot
import numpy as np
from sklearn.metrics import confusion_matrix
import sklearn
import wandb

# Ignore future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

### Load Data

In [2]:
# Root CSV files directory
dirname = "./data/absolute/3D/"  


# Load data and print summary, if desired
x_train, x_val, x_test, y_train, y_val, y_test, labels = tools.load_from(dirname, verbose=True) 

Amount Datasets by word total:
Computer :  58;  Deutschland :  67;  Haben :  71;  Hallo :  62;  Mainz :  67;  Software :  68;  Welt :  68;  du :  70;  ich :  38;  unser :  67;  zeigen :  70;   
Amount Datasets by word training:
Computer :  35;  Deutschland :  40;  Haben :  42;  Hallo :  37;  Mainz :  40;  Software :  41;  Welt :  41;  du :  42;  ich :  23;  unser :  40;  zeigen :  42;   
Amount Datasets by word validiation:
Computer :  12;  Deutschland :  13;  Haben :  14;  Hallo :  13;  Mainz :  14;  Software :  13;  Welt :  13;  du :  14;  ich :  8;  unser :  13;  zeigen :  14;   
Amount Datasets by word test:
Computer :  11;  Deutschland :  14;  Haben :  15;  Hallo :  12;  Mainz :  13;  Software :  14;  Welt :  14;  du :  14;  ich :  7;  unser :  14;  zeigen :  14;   
Distribution of data:
Amount total: 706
Amount training: 423
Amount validiation: 141
Amount test: 142
Tokens:
{'computer': 1, 'deutschland': 2, 'du': 3, 'haben': 4, 'hallo': 5, 'ich': 6, 'mainz': 7, 'software': 8, 'uns

In [3]:
with open('tokens_json.txt', 'r') as outfile:
    json_ex = outfile.read()
    

tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(json_ex)
print(tokenizer.word_index)
token_labels = {y:x for x,y in tokenizer.word_index.items()}

{'computer': 1, 'deutschland': 2, 'du': 3, 'haben': 4, 'hallo': 5, 'ich': 6, 'mainz': 7, 'software': 8, 'unser': 9, 'welt': 10, 'zeigen': 11}


In [4]:
physical_devices = tf.config.list_physical_devices('GPU') 
print("Num GPUs:", len(physical_devices)) 

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.3
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

Num GPUs: 0


In [5]:
model = tf.keras.models.load_model('model-best_3d.h5')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 100, 128)          131584    
_________________________________________________________________
lstm_1 (LSTM)                (None, 100, 256)          394240    
_________________________________________________________________
lstm_2 (LSTM)                (None, 100, 128)          197120    
_________________________________________________________________
lstm_3 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dense (Dense)                (None, 12)                1548      
Total params: 856,076
Trainable params: 856,076
Non-trainable params: 0
_________________________________________________________________


### Evaluate

In [7]:
eval = model.evaluate(x_test, y_test, verbose=2)

142/142 - 1s - loss: 0.9388 - accuracy: 0.7324 - precision: 0.7603 - recall: 0.6479


In [8]:
y_eval[0]
y_eval[1]
y_eval[2]
y_eval[3]

[0.9387986122722357, 0.73239434, 0.76033056, 0.64788735]

In [7]:
predict = model.predict(x_test)
# y_pred = np.array([np.argmax(pred) for pred in predict])
y_pred_integer = np.argmax(predict, axis=1)
y_test_integer = np.argmax(y_test, axis=1)

y_pred_name = ([token_labels[p] for p in y_pred_integer])
y_test_name = ([token_labels[p] for p in y_test_integer])


In [8]:
y_pred_name

['haben',
 'software',
 'computer',
 'software',
 'deutschland',
 'ich',
 'hallo',
 'deutschland',
 'zeigen',
 'computer',
 'zeigen',
 'haben',
 'mainz',
 'hallo',
 'du',
 'zeigen',
 'du',
 'du',
 'welt',
 'mainz',
 'mainz',
 'du',
 'software',
 'zeigen',
 'du',
 'deutschland',
 'haben',
 'deutschland',
 'mainz',
 'computer',
 'mainz',
 'mainz',
 'computer',
 'software',
 'haben',
 'ich',
 'software',
 'unser',
 'unser',
 'deutschland',
 'zeigen',
 'welt',
 'welt',
 'zeigen',
 'computer',
 'mainz',
 'mainz',
 'mainz',
 'deutschland',
 'deutschland',
 'haben',
 'welt',
 'computer',
 'hallo',
 'welt',
 'mainz',
 'du',
 'unser',
 'hallo',
 'welt',
 'ich',
 'unser',
 'software',
 'du',
 'haben',
 'computer',
 'unser',
 'unser',
 'hallo',
 'du',
 'deutschland',
 'unser',
 'hallo',
 'welt',
 'software',
 'hallo',
 'ich',
 'software',
 'software',
 'welt',
 'software',
 'hallo',
 'mainz',
 'zeigen',
 'ich',
 'haben',
 'hallo',
 'deutschland',
 'haben',
 'welt',
 'welt',
 'haben',
 'welt',
 'c

In [9]:
y_test_name 

['haben',
 'software',
 'computer',
 'software',
 'deutschland',
 'ich',
 'hallo',
 'deutschland',
 'mainz',
 'computer',
 'zeigen',
 'haben',
 'mainz',
 'hallo',
 'du',
 'zeigen',
 'du',
 'du',
 'welt',
 'mainz',
 'haben',
 'du',
 'software',
 'zeigen',
 'du',
 'deutschland',
 'haben',
 'deutschland',
 'mainz',
 'computer',
 'mainz',
 'mainz',
 'software',
 'software',
 'haben',
 'ich',
 'software',
 'unser',
 'unser',
 'deutschland',
 'zeigen',
 'software',
 'welt',
 'zeigen',
 'computer',
 'haben',
 'mainz',
 'mainz',
 'deutschland',
 'deutschland',
 'haben',
 'software',
 'computer',
 'hallo',
 'welt',
 'mainz',
 'du',
 'computer',
 'hallo',
 'welt',
 'ich',
 'unser',
 'software',
 'du',
 'haben',
 'computer',
 'unser',
 'unser',
 'hallo',
 'du',
 'deutschland',
 'unser',
 'hallo',
 'welt',
 'software',
 'hallo',
 'ich',
 'software',
 'software',
 'welt',
 'hallo',
 'hallo',
 'mainz',
 'zeigen',
 'ich',
 'haben',
 'hallo',
 'deutschland',
 'ich',
 'welt',
 'welt',
 'haben',
 'welt'

In [10]:
wandb.init(project="Test", name="confusion")

wandb: Wandb version 0.9.4 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


W&B Run: https://app.wandb.ai/slr-hs-mainz/Test/runs/c861vtsp

In [11]:
# Confusion Matrix
#sklearn.metrics.plot_confusion_matrix(y_test_name, y_pred_name)
wandb.sklearn.plot_confusion_matrix(y_test_name, y_pred_name)

wandb: Wandb version 0.9.4 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade
