# Evaluate on Test Data

In [None]:
import pandas as pd
import numpy as np
import pickle
from pathlib import Path

from sklearn.preprocessing import LabelBinarizer
import keras.models as models
import sklearn.datasets as skds
from sklearn import metrics

## Load Model and Tokenizer

Load Tokenizer and Neural Net which produced the best results on the training data.

In [None]:
tokenizer_file = 'resources/tokenizer/tokenizer.pickle' # insert here the tokenizer you want to evaluate
mode = 'binary'# insert here the weighting mode you used for vectorization
model_file = 'resources/models/smallModel.h5' # insert here the neural network you want to evaluate

In [None]:
with open(tokenizer_file, 'rb') as handle:
    tokenizer = pickle.load(handle)

classifier = models.load_model(model_file)

Load the test data and preprocess them like you did with the training data.

In [None]:
path_test = "./resources/aclImdb/test"
labels = ["pos", "neg"] # contains all category labels that we want to classify
num_labels = 2 # number of labels

files_test = skds.load_files(path_test,load_content=False, categories=labels, encoding="UTF-8")

file_paths = files_test.filenames
label_names = files_test.target_names
labelled_files_index = files_test.target

In [None]:
data_list = []

for i, file in enumerate(file_paths):
    label_name = label_names[labelled_files_index[i]]
    content = Path(file).read_text(encoding="UTF-8")
    data_list.append((file,label_name,content))

In [None]:
data_tags=["filename","category","review"]
data = pd.DataFrame.from_records(data_list, columns=data_tags)
data.head()

In [None]:
test_reviews = data['review']
test_tags = data['category']
test_file_names = data['filename']

encoder = LabelBinarizer()
encoder.fit(test_tags)

## take the same tokenizer as for training
x_test = tokenizer.texts_to_matrix(test_reviews, mode=mode)
y_test = encoder.transform(test_tags)

***

Now, we [predict](https://keras.io/models/model/#predict) labels for all test reviews. If the probability for a positive review is more than 0.5, "pos" will be assigned.

In [None]:
probs = classifier.predict(x_test)
y_classified = ['pos' if x > 0.5 else 'neg' for x in probs]

y_true = list(encoder.inverse_transform(y_test)) #transform true encoded categories (0 and 1) to labels (neg and pos)

print(y_classified[:10]) #print first 10 predictions and true labels
print(y_true[:10])

We create a [confusion matrix](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html) to compare all predicted labels(y_classified) with the true labels(y_true).

In [None]:
cm = metrics.confusion_matrix(y_true, y_classified, labels=["neg", "pos"])
cm

The confusion matrix gives us all values for further evaluation computations.

In [None]:
tn, fp, fn, tp = cm.ravel()
pre = metrics.precision_score(y_true, y_classified, pos_label='pos')
rec = metrics.recall_score(y_true, y_classified, pos_label='pos')
print("TN, FP, FN, TP ", (tn, fp, fn, tp))
print("Precision ", pre)
print("Recall ", rec)

You can also use [Keras](https://keras.io/models/model/#evaluate) to compute the model's accuracy.

In [None]:
result = classifier.evaluate(x_test, y_test,
                    batch_size=100,
                    verbose=1)

print(classifier.metrics_names)
print(result)