In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import pickle
import numpy as np
import collections
from sklearn import metrics
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
with open(r'..\Dataset1\Dataset1Splits\X_train.pkl', 'rb') as f:
    X_train = pickle.load(f)

with open(r'..\Dataset1\Dataset1Splits\X_val.pkl', 'rb') as f:
    X_val = pickle.load(f)

with open(r'..\Dataset1\Dataset1Splits\X_test.pkl', 'rb') as f:
    X_test = pickle.load(f)

with open(r'..\Dataset1\Dataset1Splits\y_train.pkl', 'rb') as f:
    y_train = pickle.load(f)

with open(r'..\Dataset1\Dataset1Splits\y_val.pkl', 'rb') as f:
    y_val = pickle.load(f)

with open(r'..\Dataset1\Dataset1Splits\y_test.pkl', 'rb') as f:
    y_test = pickle.load(f)

In [3]:
print(collections.Counter(y_train))
print(collections.Counter(y_val))
print(collections.Counter(y_test))

Counter({5: 20278, 4: 20276, 2: 20215, 1: 20215, 3: 20165})
Counter({2: 5129, 3: 5074, 4: 5061, 1: 5058, 5: 4966})
Counter({3: 4511, 5: 4506, 1: 4477, 4: 4413, 2: 4406})


In [4]:
y_train = keras.utils.to_categorical(y_train-1, num_classes=5)
y_val = keras.utils.to_categorical(y_val-1, num_classes=5)
y_test = keras.utils.to_categorical(y_test-1, num_classes=5)

In [5]:
with open(r'..\Dataset1\embeddingMatrixDS1.pkl', 'rb') as f:
    embedding_matrix = pickle.load(f)

In [6]:
num_tokens = 58810 # total vocabulary +1
embedding_dim = 300 # dimension of the vector of a single word
MAX_REVIEW_LEN = 2000 # maximum words in a review

In [7]:
embedding_layer = keras.layers.Embedding(
    num_tokens,
    embedding_dim,
    embeddings_initializer=keras.initializers.Constant(embedding_matrix),
    input_length=MAX_REVIEW_LEN,
    trainable=True
)

In [8]:
tf.random.set_seed(0)
model = keras.Sequential()
model.add(embedding_layer)
# This layer will calculate an average of those vectors.
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(128, activation='relu', kernel_initializer=tf.keras.initializers.random_normal(seed=1)))
model.add(keras.layers.Dense(64, activation='relu', kernel_initializer=tf.keras.initializers.random_normal(seed=1)))
model.add(keras.layers.Dense(5, activation='softmax', kernel_initializer=tf.keras.initializers.random_normal(seed=1)))


model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [9]:
NUM_EPOCHS = 25
BATCH_SIZE = 128

callback_trigger = keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)
history = model.fit(X_train, y_train, epochs=NUM_EPOCHS, batch_size=BATCH_SIZE, validation_data=(X_val, y_val), callbacks=[callback_trigger])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25


In [10]:
score = model.evaluate(X_test, y_test, verbose=1)
print("Test score: ", score[0])
print("Test accuracy: ", score[1])

Test score:  1.1705772876739502
Test accuracy:  0.5220723152160645


In [11]:
y_prediction = model.predict(X_test)
y_prediction = np.argmax(y_prediction, axis=1)
y_actuals = np.argmax(y_test, axis=1)



In [12]:
accuracy_score = metrics.accuracy_score(y_prediction, y_actuals)

print('SNN accuracy is',str('{:04.2f}'.format(accuracy_score*100))+'%')
print('------------------------------------------------')
print('Confusion Matrix:')
print(pd.DataFrame(confusion_matrix(y_actuals, y_prediction)))
print('------------------------------------------------')
print('Classification Report:')
print(classification_report(y_actuals, y_prediction))

SNN accuracy is 52.21%
------------------------------------------------
Confusion Matrix:
      0     1     2     3     4
0  2333  1279   395   174   296
1   540  1912  1183   520   251
2   147   763  1965  1266   370
3    58   173   683  2287  1212
4    62    74   188  1030  3152
------------------------------------------------
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.52      0.61      4477
           1       0.46      0.43      0.44      4406
           2       0.45      0.44      0.44      4511
           3       0.43      0.52      0.47      4413
           4       0.60      0.70      0.64      4506

    accuracy                           0.52     22313
   macro avg       0.53      0.52      0.52     22313
weighted avg       0.54      0.52      0.52     22313

