In [1]:
import os
import numpy as np
from tensorflow.keras.models import load_model
import tensorflow as tf

In [3]:
from own.loading import load_reviews_and_rids
from own.loading import load_train_test_rid_lists
from own.loading import load_RID_and_rating

from own.functions import get_matching_reviews

from own.vocab import load_vocab

from own.classification_preparation import reviews_to_string

from own.classification_pretrained import encode_and_pad_seqs
from own.classification_glove import create_embedding_layer
from own.classification_pretrained import create_model
from own.classification_pretrained import calc_metrics

# Loading and preparation of Train- and Testset

In [4]:
vocab = load_vocab(os.path.join("data","vocabs","train_vocab.txt"))

In [5]:
directory = os.path.join("data", "reviews")
train_path = os.path.join(directory, "processed_trainset.txt")
test_path = os.path.join(directory, "processed_testset.txt")

texts_trainset, rids_trainset = load_reviews_and_rids(train_path)
texts_testset, rids_testset = load_reviews_and_rids(test_path)

File loaded successfully
File loaded successfully


In [6]:
train_docs = reviews_to_string(texts_trainset)
test_docs = reviews_to_string(texts_testset)

In [7]:
df_rating = load_RID_and_rating()
rid_values = np.array(df_rating.RID.values)
rating_values = np.array(df_rating.rating.values)

ytrain, train_matching_RIDs = np.array(get_matching_reviews(rid_values, rating_values, rids_trainset))
ytest, test_matching_RIDs = np.array(get_matching_reviews(rid_values, rating_values, rids_testset))

File loaded successfully
Found 1800 of 1800 seached results
Found 200 of 200 seached results


# Tokenizer

In [8]:
tokenizer_index, Xtrain, Xtest, vocab_size, max_length = encode_and_pad_seqs(train_docs, test_docs)

# Creating the Embedding Layer

In [9]:
file_path = os.path.join("data", "models", "own.glove.6B.100d.txt")
embedding_layer = create_embedding_layer(file_path, vocab_size, max_length,tokenizer_index, vocab)

# Model

## Creating the Model

In [11]:
# define model
model = create_model(embedding_layer)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 804, 100)          1301100   
_________________________________________________________________
conv1d (Conv1D)              (None, 800, 128)          64128     
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 400, 128)          0         
_________________________________________________________________
flatten (Flatten)            (None, 51200)             0         
_________________________________________________________________
dense (Dense)                (None, 1)                 51201     
Total params: 1,416,429
Trainable params: 115,329
Non-trainable params: 1,301,100
_________________________________________________________________
None


## Fitting the Model to training data

In [13]:
# fit network
model.fit(Xtrain, ytrain, epochs=15, verbose=2)

Train on 1800 samples
Epoch 1/15
1800/1800 - 5s - loss: 0.0104 - true_positives: 1656.0000 - true_negatives: 141.0000 - false_positives: 1.0000 - false_negatives: 2.0000
Epoch 2/15
1800/1800 - 5s - loss: 0.0085 - true_positives: 1657.0000 - true_negatives: 142.0000 - false_positives: 0.0000e+00 - false_negatives: 1.0000
Epoch 3/15
1800/1800 - 5s - loss: 0.0062 - true_positives: 1658.0000 - true_negatives: 141.0000 - false_positives: 1.0000 - false_negatives: 0.0000e+00
Epoch 4/15
1800/1800 - 5s - loss: 0.0049 - true_positives: 1657.0000 - true_negatives: 142.0000 - false_positives: 0.0000e+00 - false_negatives: 1.0000
Epoch 5/15
1800/1800 - 5s - loss: 0.0038 - true_positives: 1658.0000 - true_negatives: 142.0000 - false_positives: 0.0000e+00 - false_negatives: 0.0000e+00
Epoch 6/15
1800/1800 - 5s - loss: 0.0028 - true_positives: 1658.0000 - true_negatives: 142.0000 - false_positives: 0.0000e+00 - false_negatives: 0.0000e+00
Epoch 7/15
1800/1800 - 5s - loss: 0.0024 - true_positives: 165

<tensorflow.python.keras.callbacks.History at 0x1c5479a1088>

## Saving and Loading the Model

In [14]:
dir = os.path.join("data","models","classifier")
model.save(os.path.join(dir,'glove_classifier.h5'))

## Evaluating the Model

In [15]:
glove_test_loss, glove_test_tp, glove_test_tn, glove_test_fp, glove_test_fn = model.evaluate(Xtest, ytest, verbose = 0)

In [16]:
calc_metrics("glove_test", glove_test_tp, glove_test_tn, glove_test_fp, glove_test_fn)

glove_test
 Precision: 0.9242424368858337
 Recall: 0.9891892075538635
 Negative Prediction Value: 0.0
 Specificity: 0.0
 Error Rate: 0.08500000089406967
 F1-Score: 0.9556136217568121
