# Predict

This is the notebook for loading and training models.
Furthermore it provides simple documentation for different approaches used for training a model.

Run the command below to see command-completion on pressing `TAB`.

### Imports

In [11]:
# Imports
import os
import warnings
import tools
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras import layers, models
from matplotlib import pyplot
import numpy as np
from sklearn.metrics import confusion_matrix
import sklearn
import wandb

# Ignore future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [12]:
physical_devices = tf.config.list_physical_devices('GPU') 
print("Num GPUs:", len(physical_devices)) 

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.3
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

Num GPUs: 1




### Load Data

In [13]:
# Root CSV files directory
#dirname = "./data/live/"
dirname = "./data/absolute/modified/"


# Load data and print summary, if desired
x_train, x_val, x_test, y_train, y_val, y_test, labels = tools.load_from(dirname, verbose=True) 

Amount Datasets by word total:
Computer :  58;  Deutschland :  67;  Haben :  71;  Hallo :  62;  Mainz :  67;  Software :  68;  Welt :  68;  du :  70;  ich :  69;  unser :  67;  zeigen :  70;   
Amount Datasets by word training:
Computer :  35;  Deutschland :  40;  Haben :  43;  Hallo :  37;  Mainz :  40;  Software :  41;  Welt :  41;  du :  42;  ich :  41;  unser :  40;  zeigen :  42;   
Amount Datasets by word validiation:
Computer :  12;  Deutschland :  13;  Haben :  14;  Hallo :  13;  Mainz :  14;  Software :  13;  Welt :  13;  du :  14;  ich :  14;  unser :  13;  zeigen :  14;   
Amount Datasets by word test:
Computer :  11;  Deutschland :  14;  Haben :  14;  Hallo :  12;  Mainz :  13;  Software :  14;  Welt :  14;  du :  14;  ich :  14;  unser :  14;  zeigen :  14;   
Distribution of data:
Amount total: 737
Amount training: 442
Amount validiation: 147
Amount test: 148
Tokens:
{'computer': 1, 'deutschland': 2, 'du': 3, 'haben': 4, 'hallo': 5, 'ich': 6, 'mainz': 7, 'software': 8, 'u

In [14]:
with open('tokens_json.txt', 'r') as outfile:
    json_ex = outfile.read()
    

tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(json_ex)
print(tokenizer.word_index)
token_labels = {y:x for x,y in tokenizer.word_index.items()}

{'computer': 1, 'deutschland': 2, 'du': 3, 'haben': 4, 'hallo': 5, 'ich': 6, 'mainz': 7, 'software': 8, 'unser': 9, 'welt': 10, 'zeigen': 11}


In [15]:
model = tf.keras.models.load_model('model-best_2d.h5')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 100, 64)           38656     
_________________________________________________________________
lstm_1 (LSTM)                (None, 100, 128)          98816     
_________________________________________________________________
lstm_2 (LSTM)                (None, 100, 128)          131584    
_________________________________________________________________
lstm_3 (LSTM)                (None, 100, 256)          394240    
_________________________________________________________________
lstm_4 (LSTM)                (None, 64)                82176     
_________________________________________________________________
dense (Dense)                (None, 12)                780       
Total params: 746,252
Trainable params: 746,252
Non-trainable params: 0
__________________________________________________

### Evaluate

In [16]:
predict = model.predict(x_train)
# y_pred = np.array([np.argmax(pred) for pred in predict])
y_pred_integer = np.argmax(predict, axis=1)
y_test_integer = np.argmax(y_train, axis=1)

y_pred_name = ([token_labels[p] for p in y_pred_integer])
y_test_name = ([token_labels[p] for p in y_test_integer])

In [8]:
y_pred_name

['software',
 'du',
 'mainz',
 'haben',
 'hallo',
 'zeigen',
 'mainz',
 'software',
 'du',
 'welt',
 'computer',
 'welt',
 'du',
 'software',
 'computer',
 'du',
 'zeigen',
 'du',
 'hallo',
 'zeigen',
 'ich',
 'unser',
 'haben',
 'mainz',
 'mainz',
 'haben',
 'welt',
 'ich',
 'welt',
 'haben',
 'unser',
 'deutschland',
 'du',
 'zeigen',
 'haben',
 'mainz',
 'hallo',
 'haben',
 'haben',
 'deutschland',
 'du',
 'deutschland',
 'deutschland',
 'computer',
 'du',
 'welt',
 'haben',
 'mainz',
 'mainz',
 'welt',
 'deutschland',
 'welt',
 'deutschland',
 'computer',
 'deutschland',
 'software',
 'zeigen',
 'software',
 'mainz',
 'unser',
 'unser',
 'mainz',
 'mainz',
 'welt',
 'welt',
 'mainz',
 'hallo',
 'software',
 'zeigen',
 'welt',
 'mainz',
 'deutschland',
 'ich',
 'mainz',
 'software',
 'mainz',
 'zeigen',
 'computer',
 'ich',
 'mainz',
 'mainz',
 'ich',
 'haben',
 'welt',
 'du',
 'welt',
 'ich',
 'mainz',
 'welt',
 'du',
 'haben',
 'haben',
 'software',
 'hallo',
 'welt',
 'zeigen',
 

In [9]:
y_test_name 

['software',
 'unser',
 'mainz',
 'haben',
 'hallo',
 'mainz',
 'mainz',
 'software',
 'du',
 'welt',
 'hallo',
 'welt',
 'du',
 'zeigen',
 'computer',
 'du',
 'zeigen',
 'du',
 'hallo',
 'zeigen',
 'ich',
 'unser',
 'haben',
 'haben',
 'haben',
 'haben',
 'software',
 'unser',
 'welt',
 'haben',
 'unser',
 'deutschland',
 'unser',
 'zeigen',
 'ich',
 'mainz',
 'hallo',
 'ich',
 'haben',
 'deutschland',
 'du',
 'deutschland',
 'deutschland',
 'computer',
 'du',
 'zeigen',
 'haben',
 'mainz',
 'hallo',
 'welt',
 'deutschland',
 'welt',
 'deutschland',
 'computer',
 'deutschland',
 'software',
 'mainz',
 'software',
 'mainz',
 'unser',
 'unser',
 'mainz',
 'mainz',
 'welt',
 'software',
 'mainz',
 'hallo',
 'software',
 'zeigen',
 'software',
 'mainz',
 'deutschland',
 'ich',
 'du',
 'software',
 'computer',
 'zeigen',
 'computer',
 'haben',
 'ich',
 'haben',
 'ich',
 'haben',
 'welt',
 'unser',
 'welt',
 'ich',
 'mainz',
 'welt',
 'hallo',
 'ich',
 'haben',
 'software',
 'hallo',
 'welt

In [10]:
for i in range(0, len(predictions)):
    print('keras: ',y_pred_name[i],  predictions[i])

NameError: name 'predictions' is not defined

In [None]:
print("Predictions:", predictions)
time2= time()
dauer = time2 - time1
print("Anzahl Samples: ", len(predictions))
print("Predictiondauer: ", dauer)
print("Samples pro Sekunde:", (len(predictions)/dauer))

In [None]:
print(input_data)