# Predict

This is the notebook for loading and training models.
Furthermore it provides simple documentation for different approaches used for training a model.

Run the command below to see command-completion on pressing `TAB`.

### Imports

In [12]:
# Imports
import os
import warnings
import tools
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras import layers, models
from matplotlib import pyplot
import numpy as np
from sklearn.metrics import confusion_matrix
import sklearn
import wandb

# Ignore future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [13]:
physical_devices = tf.config.list_physical_devices('GPU') 
print("Num GPUs:", len(physical_devices)) 

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.3
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

Num GPUs: 1




### Load Data

In [18]:
# Root CSV files directory
dirname = "./data/live/"  


# Load data and print summary, if desired
x_train, x_val, x_test, y_train, y_val, y_test, labels = tools.load_from(dirname, verbose=True) 

Amount Datasets by word total:
Computer :  5;  Deutschland :  7;  Haben :  7;  Hallo :  5;  Welt :  4;  du :  6;  ich :  9;   
Amount Datasets by word training:
Computer :  3;  Deutschland :  4;  Haben :  4;  Hallo :  3;  Welt :  2;  du :  4;  ich :  5;   
Amount Datasets by word validiation:
Computer :  1;  Deutschland :  1;  Haben :  2;  Hallo :  1;  Welt :  1;  du :  1;  ich :  2;   
Amount Datasets by word test:
Computer :  1;  Deutschland :  2;  Haben :  1;  Hallo :  1;  Welt :  1;  du :  1;  ich :  2;   
Distribution of data:
Amount total: 43
Amount training: 25
Amount validiation: 9
Amount test: 9
Tokens:
{'computer': 1, 'deutschland': 2, 'du': 3, 'haben': 4, 'hallo': 5, 'ich': 6, 'mainz': 7, 'software': 8, 'unser': 9, 'welt': 10, 'zeigen': 11}


In [19]:
with open('tokens_json.txt', 'r') as outfile:
    json_ex = outfile.read()
    

tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(json_ex)
print(tokenizer.word_index)
token_labels = {y:x for x,y in tokenizer.word_index.items()}

{'computer': 1, 'deutschland': 2, 'du': 3, 'haben': 4, 'hallo': 5, 'ich': 6, 'mainz': 7, 'software': 8, 'unser': 9, 'welt': 10, 'zeigen': 11}


In [20]:
model = tf.keras.models.load_model('model-best_2d.h5')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 100, 64)           38656     
_________________________________________________________________
lstm_1 (LSTM)                (None, 100, 128)          98816     
_________________________________________________________________
lstm_2 (LSTM)                (None, 100, 128)          131584    
_________________________________________________________________
lstm_3 (LSTM)                (None, 100, 256)          394240    
_________________________________________________________________
lstm_4 (LSTM)                (None, 64)                82176     
_________________________________________________________________
dense (Dense)                (None, 12)                780       
Total params: 746,252
Trainable params: 746,252
Non-trainable params: 0
__________________________________________________

### Evaluate

In [21]:
print(x_test.shape)
print(y_test)
model.predict(x_test)

(9, 100, 86)
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]]


array([[1.63407519e-03, 6.43871538e-03, 8.17047775e-01, 8.50409851e-04,
        3.61898611e-03, 4.74120751e-02, 2.85321032e-03, 4.94563812e-03,
        6.85858028e-03, 5.93768083e-04, 9.42774713e-02, 1.34694064e-02],
       [1.60587626e-03, 7.15340534e-03, 7.69231498e-01, 8.35481507e-04,
        5.54986577e-03, 4.63290140e-02, 3.64793697e-03, 5.05892141e-03,
        9.33421310e-03, 7.02275662e-04, 1.33600727e-01, 1.69507861e-02],
       [4.90017142e-03, 1.14521161e-01, 4.01987672e-01, 9.57930926e-03,
        2.65656207e-02, 1.15411185e-01, 1.05595127e-01, 3.45639102e-02,
        1.39404777e-02, 2.87839933e-03, 1.59305260e-01, 1.07517419e-02],
       [4.92769934e-04, 4.43604290e-02, 6.09214008e-01, 1.33818034e-02,
        1.32494292e-03, 2.98599720e-01, 4.74327616e-03, 2.09063222e-03,
        2.18361593e-03, 1.81152287e-03, 1.21977208e-02, 9.59949475e-03],
       [8.93571647e-04, 6.83925599e-02, 1.65459096e-01, 2.87110778e-03,
        1.91122517e-02, 4.35957983e-02, 2.24333778e-02, 1.58

In [None]:
y_eval[0]
y_eval[1]
y_eval[2]
y_eval[3]

In [28]:
predict = model.predict(x_train)
# y_pred = np.array([np.argmax(pred) for pred in predict])
y_pred_integer = np.argmax(predict, axis=1)
y_test_integer = np.argmax(y_train, axis=1)

y_pred_name = ([token_labels[p] for p in y_pred_integer])
y_test_name = ([token_labels[p] for p in y_test_integer])


In [29]:
y_pred_name

['welt',
 'deutschland',
 'deutschland',
 'welt',
 'haben',
 'haben',
 'deutschland',
 'deutschland',
 'hallo',
 'welt',
 'deutschland',
 'deutschland',
 'deutschland',
 'welt',
 'haben',
 'deutschland',
 'deutschland',
 'welt',
 'deutschland',
 'deutschland',
 'welt',
 'deutschland',
 'computer',
 'deutschland',
 'deutschland']

In [30]:
y_test_name 

['computer',
 'deutschland',
 'ich',
 'welt',
 'hallo',
 'haben',
 'ich',
 'haben',
 'hallo',
 'haben',
 'ich',
 'ich',
 'du',
 'deutschland',
 'computer',
 'du',
 'haben',
 'deutschland',
 'ich',
 'deutschland',
 'computer',
 'du',
 'hallo',
 'du',
 'welt']

In [None]:
for i in range(0, len(predictions)):
    print('keras: ',y_pred_name[i],  predictions[i])

In [None]:
print("Predictions:", predictions)
time2= time()
dauer = time2 - time1
print("Anzahl Samples: ", len(predictions))
print("Predictiondauer: ", dauer)
print("Samples pro Sekunde:", (len(predictions)/dauer))

In [None]:
print(input_data)