In [8]:
!pip install gensim==3.8.3
!pip install keras --upgrade
!pip install pandas --upgrade
!pip install tensorflow --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [9]:
import pandas as pd

# Matplot
import matplotlib.pyplot as plt
%matplotlib inline

# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.manifold import TSNE
from sklearn.feature_extraction.text import TfidfVectorizer

# Keras
import tensorflow as tf
from keras.preprocessing.text import Tokenizer
#from keras.preprocessing.sequence import pad_sequences
from keras.utils import pad_sequences
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, Embedding, Flatten, Conv1D, MaxPooling1D, LSTM
from keras import utils
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

# nltk
import nltk
from nltk.corpus import stopwords
from  nltk.stem import SnowballStemmer

# Word2vec
import gensim

# Utility
import re
import numpy as np
import os
from collections import Counter
import logging
import time
import pickle
import itertools

In [10]:
vocab_size = 290419
# WORD2VEC 
W2V_SIZE = 300
SEQUENCE_LENGTH = 300
W2V_WINDOW = 7
W2V_EPOCH = 32
W2V_MIN_COUNT = 10

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
#LOADING
load_dir = '/content/drive/MyDrive/nns/'
train_test_dir = load_dir+'saved_train_test/'
embedding_matrix = np.load(load_dir+'embedding_matrix.npy')

x_train = np.load(train_test_dir+'x_train.npy')
y_train = np.load(train_test_dir+'y_train.npy')

x_test = np.load(train_test_dir+'x_test.npy')
y_test = np.load(train_test_dir+'y_test.npy')

###NN Model

In [38]:
#HYPER PARAMETERS
model_name = "RNN"
num_epochs = 15
batch_size = 1024
learning_rate = 1e-5
rnn_units = 1024
momentum=.9
sequence_length=300
activation="sigmoid"
optimizer='adam'

In [39]:
#MODEL
embedding_layer = Embedding(vocab_size, W2V_SIZE, weights=[embedding_matrix], 
                            input_length=SEQUENCE_LENGTH, 
                            #batch_input_shape=[batch_size, None], 
                            trainable=False)

model = Sequential()
model.add(embedding_layer)
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.GRU(rnn_units,
                        recurrent_initializer='glorot_uniform'))
model.add(Dense(1, activation='sigmoid'))


model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, 300, 300)          87125700  
                                                                 
 dropout_6 (Dropout)         (None, 300, 300)          0         
                                                                 
 gru_7 (GRU)                 (None, 1024)              4073472   
                                                                 
 dense_5 (Dense)             (None, 1)                 1025      
                                                                 
Total params: 91,200,197
Trainable params: 4,074,497
Non-trainable params: 87,125,700
_________________________________________________________________


In [40]:
#OPTIMIZATION
callbacks = [ tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=5, cooldown=0),
              tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=1e-4, patience=5)]
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [41]:
#TRAINING
%%time
history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=num_epochs,
                    validation_split=0.1,
                    verbose=1,
                    callbacks=callbacks)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
CPU times: user 3min 23s, sys: 3.56 s, total: 3min 27s
Wall time: 4min 26s


In [43]:
#TESTING
score = model.evaluate(x_test, y_test, batch_size=batch_size)

print("ACCURACY:",score[1])
print("LOSS:",score[0])

acc = history.history['accuracy']
print("acc:", acc)
val_acc = history.history['val_accuracy']
print("val_acc:", val_acc)
loss = history.history['loss']
print("loss:", loss)
val_loss = history.history['val_loss']
print("val_loss:", val_loss)


ACCURACY: 0.7618749737739563
LOSS: 0.5059124231338501
acc: [0.6464305520057678, 0.7037152647972107, 0.7280277609825134, 0.741944432258606, 0.7517222166061401, 0.7261597514152527, 0.7418264150619507, 0.761020839214325, 0.7692638635635376, 0.7757083177566528, 0.7871041893959045, 0.7907639145851135, 0.7912847399711609, 0.7920764088630676, 0.7931110858917236]
val_acc: [0.7192500233650208, 0.726687490940094, 0.7419999837875366, 0.7509999871253967, 0.754687488079071, 0.7071874737739563, 0.7500625252723694, 0.7523124814033508, 0.757687509059906, 0.7582499980926514, 0.7631875276565552, 0.7628124952316284, 0.7625625133514404, 0.7631250023841858, 0.7634375095367432]
loss: [0.6203070878982544, 0.5843851566314697, 0.534212052822113, 0.5154542922973633, 0.5016673803329468, 0.5832963585853577, 0.5167282223701477, 0.48852208256721497, 0.4759618639945984, 0.46503710746765137, 0.4497710168361664, 0.44469568133354187, 0.44358956813812256, 0.4412883520126343, 0.4399559497833252]
val_loss: [0.542832970619

In [44]:
#SAVING 
save_dir = '/content/drive/MyDrive/nns/saved_nn_models/'
model_name = "RNN"
model.save(save_dir+model_name+".h5")


In [45]:
!ls /content/drive/MyDrive/nns/saved_nn_models/

FFNN1.h5  FFNN3.h5  FFNN5.h5  LSTM.h5  RNN.h5
