In [1]:
# Imports
import os
import warnings
import tensorflow as tf
import sklearn
import numpy as np
from sklearn.metrics import confusion_matrix
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.layers import SimpleRNN, Dense
from tensorflow.keras.layers import Bidirectional
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
from matplotlib import pyplot
from data_repository import DataRepository
import sys
import tensorflow.keras as K


# Ignore future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Root CSV files directory
dirname = './data/absolute/2D/'
unseen_dirname = './data/absolute/2D_unknown/'



In [2]:
# GPU-initialization
session = None
if (session):
    session.close()
physical_devices = tf.config.list_physical_devices('GPU') 
print("Num GPUs:", len(physical_devices)) 

config = ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.3
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

Num GPUs: 1


In [3]:
# Load data and print summary, if desired
repo = DataRepository(dirname)
X_train, X_val, X_test, y_train, y_val, y_test, labels = repo.getForTraining()
X_unseen = repo.getUnseenX(unseen_dirname)
num_classes = repo.numClasses
print('num_classes', num_classes)

['Computer', 'Deutschland', 'du', 'Haben', 'Hallo', 'ich', 'Mainz', 'Software', 'unser', 'Welt', 'zeigen']
['unknown']
num_classes 11


In [4]:
#load tokens
tokens = os.listdir(dirname)
tokens = sorted(tokens, key=str.casefold) 
token_labels = {i:tokens[i] for i in range(0, len(tokens))}
print(token_labels)

{0: 'Computer', 1: 'Deutschland', 2: 'du', 3: 'Haben', 4: 'Hallo', 5: 'ich', 6: 'Mainz', 7: 'Software', 8: 'unser', 9: 'Welt', 10: 'zeigen'}


In [5]:
# Model
dropout = 0.2
nodesizes = [256, 256, 128]

inputs = keras.Input(shape=(X_train.shape[1], X_train.shape[2]))

lstm = Bidirectional(layers.LSTM(128, return_sequences=True))(inputs)
lstm = layers.Dropout(rate=dropout)(lstm)  

for i in range(0,3):    #number of layers random between 1 an 3
    lstm = Bidirectional(layers.LSTM(nodesizes[i],return_sequences=True))(lstm)
    lstm = layers.Dropout(rate=dropout)(lstm)

lstm = Bidirectional(layers.LSTM(256))(lstm)
lstm = layers.Dropout(rate=dropout)(lstm)
class_output = layers.Dense(num_classes, activation='softmax', name='class_output')(lstm)

reject_output = layers.Dense(num_classes, activation='sigmoid', name='reject_output')(lstm)

model = keras.models.Model(inputs=inputs, outputs=[class_output, reject_output])
# Plot the model graph
#keras.utils.plot_model(model, 'nn_graph.png', show_shapes=True)

model.compile(loss={
    'class_output': 'categorical_crossentropy', 
    'reject_output': 'binary_crossentropy'
    },
    optimizer='Adamax',
    metrics=['accuracy',tf.keras.metrics.Precision(),tf.keras.metrics.Recall()])

model.summary()


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 100, 86)]    0                                            
__________________________________________________________________________________________________
bidirectional (Bidirectional)   (None, 100, 256)     220160      input_1[0][0]                    
__________________________________________________________________________________________________
dropout (Dropout)               (None, 100, 256)     0           bidirectional[0][0]              
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 100, 512)     1050624     dropout[0][0]                    
______________________________________________________________________________________________

In [6]:
# Train
history = model.fit(
    X_train, [y_train, y_train], 
    epochs=10, 
    batch_size=30, 
    validation_data=(X_val,[y_val, y_val]), 
    shuffle=False)

Epoch 1/10
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [7]:
#Test accuracy
y_eval = model.evaluate(X_test, [y_test, y_test], verbose=2)

6/6 - 0s - loss: 1.0418 - class_output_loss: 0.8877 - reject_output_loss: 0.1541 - class_output_accuracy: 0.6774 - class_output_precision: 0.8143 - class_output_recall: 0.6129 - reject_output_accuracy: 0.6774 - reject_output_precision: 0.8571 - reject_output_recall: 0.4194


In [8]:
y_pred = model.predict(X_test)[0]

y_pred_integer = np.argmax(y_pred, axis=1)
y_test_integer = np.argmax(y_test, axis=1)
y_pred_name = ([token_labels[p] for p in y_pred_integer])
y_test_name = ([token_labels[p] for p in y_test_integer])

In [9]:
# Decode one_hot

y_train_dec = [np.argmax(encoded) for encoded in y_train]
y_train_dec = np.array(y_train_dec)

In [10]:
# predict on training examples for calculate standard deviation
seen_train_X_pred = np.array(model.predict(X_train)[1])

(557, 11)
0


In [11]:
# Fit gaussian model
from scipy.stats import norm as dist_model
def fit(prob_pos_X):
    prob_pos = [p for p in prob_pos_X]+[2-p for p in prob_pos_X]
    pos_mu, pos_std = dist_model.fit(prob_pos)
    return pos_mu, pos_std

In [45]:
mu_stds = []
for i in range(num_classes):
    pos_mu, pos_std = fit(seen_train_X_pred[y_train_dec==i,i])
    mu_stds.append([pos_mu, pos_std])

In [46]:
print(mu_stds)

[[1.0, 0.49104826562208564], [1.0, 0.5066123236926862], [1.0, 0.5085201890274934], [1.0, 0.4987507999851693], [1.0, 0.5009499893339773], [1.0, 0.5079735590743689], [1.0, 0.49575559993669993], [1.0, 0.48907559402482914], [1.0, 0.49499169912961605], [1.0, 0.49519171160294567], [1.0, 0.4873148220049561]]


In [12]:
# Predict on test examples
test_X_pred = model.predict(np.concatenate([X_test, X_unseen], axis=0))[1]
test_y_gt = np.concatenate([[np.argmax(encoded) for encoded in y_test], [num_classes for _ in X_unseen]], axis=0)

In [48]:
# get reject prediction based on threshold
test_y_pred = []
scale = 1.
for p in test_X_pred:
    max_class = np.argmax(p)
    max_value = np.max(p)
    threshold = max(0.5, 1. - scale * mu_stds[max_class][1])
    if max_value > threshold:
        test_y_pred.append(max_class)
    else:
        test_y_pred.append(num_classes)

In [25]:
from sklearn.metrics import precision_recall_fscore_support

In [26]:
precision, recall, fscore, _ = precision_recall_fscore_support(test_y_gt, test_y_pred)
print('macro fscore:', np.mean(fscore))

macro fscore: 0.1976192520606971


In [13]:
import tools
seen_train_X_pred = np.array(model.predict(X_train)[1])
test_X_pred = model.predict(np.concatenate([X_test, X_unseen], axis=0))[1]
test_y_gt = np.concatenate([[np.argmax(encoded) for encoded in y_test], [num_classes for _ in X_unseen]], axis=0)
print(seen_train_X_pred.shape, y_train_dec.shape)
print(num_classes)
test_y_pred = tools.getRejectPredictions(num_classes, test_X_pred, test_y_gt, y_train)
precision, recall, fscore, _ = precision_recall_fscore_support(test_y_gt, test_y_pred)

(557, 11) (557,)
11


IndexError: too many indices for array