In [None]:
########################################################################################################################
# Filename: RNN_Models.ipynb
#
# Purpose: Multi-label Text-categorization via recurrent neural networks

# Author(s): Bobby (Robert) Lumpkin
#
# Library Dependencies: numpy, pandas, scikit-learn, skmultilearn, joblib, os, sys, threshold_learning
########################################################################################################################

# Multilabel Text Classification with Recurrent Neural Networks

In [1]:
import numpy as np
import pandas as pd
import math
import os
import json
import ast
import random
from joblib import dump, load
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from bpmll import bp_mll_loss
import sklearn_json as skljson
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
import sys
os.chdir('C:\\Users\\rober\\OneDrive\\Documents\\Multilabel-Text-Classification\\Deep Learning Models\\RNN Models')  ## Set working directory
                                                                                                                      ## to be 'ANN Results'
sys.path.append('../../ThresholdFunctionLearning')    ## Append path to the ThresholdFunctionLearning directory to the interpreters
                                                   ## search path
from threshold_learning import predict_test_labels_binary    ## Import the 'predict_test_labels_binary()' function from the 
from threshold_learning import predict_labels_binary         ## threshold_learning library
sys.path.append('GridSearchAid_FFNetworks')
from FFNN_gridSearch_aid import SizeLayersPows2, createModel

In [16]:
## Load the seperabe PCs training and test data
npzfile = np.load("../../Data/seq_trainTest_data.npz")
X_train_padded = npzfile['train_padded']
X_test_padded = npzfile['test_padded']
Y_train = npzfile['Y_train'].astype('float64')
Y_test = npzfile['Y_test'].astype('float64')
num_unique_words = npzfile['num_unique_words']

# Cross Entropy Models -- Traditional ("Naive") Approach

In [17]:
## Define the LSTM RNN architecture
num_labels = Y_train.shape[1]

model_biLSTM = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(num_unique_words, 32, input_length = X_train_padded.shape[1]),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16, return_sequences = False, return_state = False)),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim = tf.keras.optimizers.Adam(lr=0.01)
#optim = tf.keras.optimizers.Adagrad(
#    learning_rate = 0.001, initial_accumulator_value = 0.1, epsilon = 1e-07,
#    name = 'Adagrad')

#optim = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9, momentum = 0.8, epsilon=1e-07,)

metric = tfa.metrics.HammingLoss(mode = 'multilabel', threshold = 0.5)
model_biLSTM.compile(loss = 'binary_crossentropy', optimizer = optim, metrics = metric)

In [18]:
tf.random.set_seed(123)
model_biLSTM.fit(X_train_padded, Y_train, epochs = 30, validation_data = (X_test_padded, Y_test), verbose=2)

Epoch 1/30
243/243 - 102s - loss: 0.0701 - hamming_loss: 0.0176 - val_loss: 0.0459 - val_hamming_loss: 0.0129
Epoch 2/30
243/243 - 123s - loss: 0.0407 - hamming_loss: 0.0114 - val_loss: 0.0385 - val_hamming_loss: 0.0108
Epoch 3/30
243/243 - 159s - loss: 0.0305 - hamming_loss: 0.0080 - val_loss: 0.0316 - val_hamming_loss: 0.0082
Epoch 4/30
243/243 - 178s - loss: 0.0247 - hamming_loss: 0.0068 - val_loss: 0.0281 - val_hamming_loss: 0.0076
Epoch 5/30
243/243 - 180s - loss: 0.0211 - hamming_loss: 0.0060 - val_loss: 0.0257 - val_hamming_loss: 0.0070
Epoch 6/30
243/243 - 179s - loss: 0.0174 - hamming_loss: 0.0051 - val_loss: 0.0231 - val_hamming_loss: 0.0062
Epoch 7/30
243/243 - 154s - loss: 0.0146 - hamming_loss: 0.0044 - val_loss: 0.0221 - val_hamming_loss: 0.0061
Epoch 8/30
243/243 - 95s - loss: 0.0121 - hamming_loss: 0.0036 - val_loss: 0.0223 - val_hamming_loss: 0.0058
Epoch 9/30
243/243 - 90s - loss: 0.0104 - hamming_loss: 0.0031 - val_loss: 0.0228 - val_hamming_loss: 0.0058
Epoch 10/30


<tensorflow.python.keras.callbacks.History at 0x215e6859e20>

In [19]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file 
outfile = "Training Histories/history_ce_RNN_lr001.npz" 
#np.savez_compressed(outfile, history_biLSTM001 = history_biLSTM001)

#model_biLSTM.save_weights('Models/biLSTM_ce_001')