In [32]:
import tensorflow as tf
import numpy as np
from keras.preprocessing.sequence import pad_sequences
import json
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding
from keras.callbacks import EarlyStopping
import os
from keras.callbacks import ModelCheckpoint

In [11]:
SEED_NUM = 1234
tf.random.set_seed = (SEED_NUM)

In [12]:
DATA_IN_PATH = './data_in/'
DATA_OUT_PATH = './data_out/'
TRAIN_INPUT_DATA = 'train_input.npy'
TRAIN_LABEL_DATA = 'train_label.npy'
DATA_CONFIGS = 'data_configs.json'

In [13]:
train_input = np.load(open(DATA_IN_PATH + TRAIN_INPUT_DATA, 'rb'))
train_input = pad_sequences(train_input, maxlen= train_input.shape[1])
train_label = np.load(open(DATA_IN_PATH + TRAIN_LABEL_DATA, 'rb'))
prepro_configs = json.load(open(DATA_IN_PATH + DATA_CONFIGS, 'r'))

In [17]:
model_name = 'rnn_classifier_en'
BATCH_SIZE = 128
NUM_EPOCHS = 5
VALID_SPLIT = 0.1
MAX_LEN = train_input.shape[1]

In [18]:
kargs = {
    'model_name': model_name,
    'vocab_size': prepro_configs['vocab_size'],
    'embedding_dimension': 100,
    'dropout_rate': 0.2,
    'lstm_dimension': 150,
    'dense_dimension': 150,
    'output_dimension': 1
}

In [34]:
class RNNClassifier(tf.keras.Model):
    def __init__(self, **kargs):
        super(RNNClassifier, self).__init__(name=kargs['model_name'])
        self.embedding = Embedding(input_dim = kargs['vocab_size'], output_dim=kargs['embedding_dimension'])
        self.lstm_1_layer = tf.keras.layers.LSTM(kargs['lstm_dimension'], return_sequences=True)
        self.lstm_2_layer = tf.keras.layers.LSTM(kargs['lstm_dimension'])
        self.dropout = tf.keras.layers.Dropout(kargs['dropout_rate'])
        self.fc1 = Dense(units=kargs['dense_dimension'],activation=tf.keras.activations.tanh)
        self.fc2 = Dense(units=kargs['output_dimension'],activation=tf.keras.activations.sigmoid)
        
    def call(self, x):
        x = self.embedding(x)
        x = self.dropout(x)
        x = self.lstm_1_layer(x)
        x = self.lstm_2_layer(x)
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.dropout(x)
        x = self.fc2(x)

        return x

        

In [35]:
model = RNNClassifier(**kargs)
model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
             loss=tf.losses.BinaryCrossentropy(),
             metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy')])


In [None]:
earlystop_callback = EarlyStopping(monitor='val_accuracy', min_delta=0.0001,  patience=2)

checkpoint_path = DATA_OUT_PATH + model_name + '/weights.h5'
checkpoint_dir = os.path.dirname(checkpoint_path)

if os.path.exists(checkpoint_dir):
    print("{} -- Folder already exists \n".format(checkpoint_dir))
else:
    os.makedirs(checkpoint_dir, exist_ok = True)
    print("{} -- Folder create complete \n".format(checkpoint_dir))
    
cp_callback = ModelCheckpoint(checkpoint_path, monitor='val_accuracy', verbose = 1, save_best_only= True, save_weights_only=True)

history = model.fit(train_input, train_label, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_split=VALID_SPLIT, callbacks=[earlystop_callback, cp_callback])

./data_out/rnn_classifier_en -- Folder already exists 

Epoch 1/5

Epoch 00001: val_accuracy improved from -inf to 0.51280, saving model to ./data_out/rnn_classifier_en/weights.h5
Epoch 2/5
  2/176 [..............................] - ETA: 24:09 - loss: 0.6919 - accuracy: 0.5586