In [None]:
########################################################################################################################
# Filename: FF_Models_GenerateResults.ipynb
#
# Purpose: Multi-label Text-categorization via feed forward neural networks -- Train networks/generate results

# Author(s): Bobby (Robert) Lumpkin
#
# Library Dependencies: numpy, pandas, scikit-learn, skmultilearn, joblib, os, sys, threshold_learning
########################################################################################################################

# Multilabel Text Classification with Feed Forward Networks

In [1]:
import numpy as np
import pandas as pd
import math
import os
import json
import ast
import random
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from bpmll import bp_mll_loss
import sklearn_json as skljson
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
import sys
os.chdir('C:\\Users\\rober\\OneDrive\\Documents\\Multilabel-Text-Classification\\Deep Learning Models\\FF Models')  ## Set working directory
                                                                                                                      ## to be 'ANN Results'
sys.path.append('../../ThresholdFunctionLearning')    ## Append path to the ThresholdFunctionLearning directory to the interpreters
                                                   ## search path
from threshold_learning import predict_test_labels_binary    ## Import the 'predict_test_labels_binary()' function from the 
from threshold_learning import predict_labels_binary         ## threshold_learning library
sys.path.append('GridSearch_FFNetworks')
from gridSearch_forFFNNs import SizeLayersPows2

## Load in Training/Test Data

In [2]:
## Load the seperabe PCs training and test data
npzfile = np.load("../../Data/tfidf_PC_separable.npz")
X_sepPCs_train = npzfile['X_sepPCs_train']
X_sepPCs_test = npzfile['X_sepPCs_test']
Y_train = npzfile['Y_train'].astype('float64')
Y_test = npzfile['Y_test'].astype('float64')

## Load the autoencoder train/test features
npzfile = np.load('../../Data/tfidf_encoded_data.npz')
encoded_train = npzfile['encoded_train']
encoded_test = npzfile['encoded_test']

## Load the tfidf training and test data
npzfile = np.load("../../Data/tfidf_trainTest_data.npz", allow_pickle = True)
X_tfidfTrain = npzfile['X_tfidfTrain']
X_tfidfTest = npzfile['X_tfidfTest']

## Build Functions for Network Hyperparameter Tuning

In [6]:
## Define a function that generates a list of layer sizes for a network
def SizeLayersPows2(n_layers, first_layer_nodes, last_layer_nodes):
    layers = []
    
    first_layer_exp = math.log(first_layer_nodes, 2)
    last_layer_exp = math.log(last_layer_nodes, 2)
    exp_increment = (last_layer_exp - first_layer_exp) / (n_layers - 1)
    nodes = first_layer_nodes
    nodes_exp = first_layer_exp
    for i in range(1, n_layers):
        layers.append(nodes)
        nodes_exp = nodes_exp + exp_increment
        nodes = 2 ** round(nodes_exp)
    layers.append(last_layer_nodes)
    
    return layers

In [3]:
SizeLayersPows2(3, 7000, 90)

[7000, 1024, 90]

In [51]:
def createModel(X_train, n_layers, first_layer_nodes, last_layer_nodes, activation_func, output_activation, loss_func, learning_rate = 0.01, Dropout_reg = True, drop_prob = 0.5):
    model = Sequential()
    n_nodes = SizeLayersPows2(n_layers, first_layer_nodes, last_layer_nodes)
    for i in range(1, n_layers):
        if i == 1:
            model.add(Dense(first_layer_nodes, input_dim = X_train.shape[1], activation=activation_func))
        else:
            if Dropout == True:
                model.add(Dropout(drop_prob))
            model.add(Dense(n_nodes[i - 1], activation = activation_func))
            
    #Finally, the output layer should have a single node in binary classification
    model.add(Dense(last_layer_nodes, activation = output_activation))
    optim_func = tf.keras.optimizers.Adam(lr = learning_rate)
    model.compile(optimizer = optim_func, loss = loss_func, metrics = tfa.metrics.HammingLoss(mode = 'multilabel', threshold = 0.5)) 
    
    return model

In [58]:
model = createModel(X_sepPCs_train, 3, X_sepPCs_train.shape[0], 90, 'relu', 'sigmoid', 'binary_crossentropy')
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 7769)              264146    
_________________________________________________________________
dense_17 (Dense)             (None, 1024)              7956480   
_________________________________________________________________
dense_18 (Dense)             (None, 90)                92250     
Total params: 8,312,876
Trainable params: 8,312,876
Non-trainable params: 0
_________________________________________________________________


In [45]:
## Wrap model into scikit-learn
model =  KerasClassifier(build_fn = createmodel, verbose = False)

In [None]:
param_grid = dict(X_train,
                  n_layers=[3,4,5], 
                  first_layer_nodes = [64,32,16], 
                  last_layer_nodes = [4],  
                  activation_func = 'relu', 
                  output_activation = 'sigmoid', 
                  loss_func = 'binary_crossentropy', 
                  #batch_size = [1], 
                  epochs = [30])
grid = GridSearchCV(estimator = model, param_grid = param_grid)

In [47]:
def createModel2(X_train, n_layers, first_layer_nodes, last_layer_nodes, activation_func, output_activation, loss_func, learning_rate = 0.01, Dropout_reg = True, drop_prob = 0.5):
    n_nodes = SizeLayersPows2(n_layers, first_layer_nodes, last_layer_nodes)
    layers_string = f"tf.keras.models.Sequential(["
    for layer in range(2, n_layers):
        layers_string = layers_string + f"tf.keras.layers.Dense({n_nodes[layer - 1]}, activation = \'" + activation_func + f"\'),"
    layers_string = layers_string + f"tf.keras.layers.Dense({last_layer_nodes}, activation = \'" + output_activation + "\')])"
    
    model = eval(layers_string)
    optim_func = tf.keras.optimizers.Adam(lr = learning_rate)
    model.compile(optimizer = optim_func, loss = loss_func, metrics = tfa.metrics.HammingLoss(mode = 'multilabel', threshold = 0.5)) 
    return model

In [59]:
model = createModel2(X_sepPCs_train, 3, X_sepPCs_train.shape[0], Y_train.shape[1], activation_func = 'relu', output_activation = 'sigmoid', loss_func = 'binary_crossentropy')
#model.fit(X_sepPCs_train, Y_train, epochs = 3,
#             validation_data = (X_sepPCs_test, Y_test), verbose = 2)

In [60]:
## Wrap model into scikit-learn
model =  KerasClassifier(build_fn = createModel2, verbose = False)

In [74]:
param_grid = dict(X_train = [X_sepPCs_train],
                  n_layers=[3,4,5], 
                  first_layer_nodes = [X_sepPCs_train.shape[0]], 
                  last_layer_nodes = [Y_train.shape[1]],  
                  activation_func = ['relu'], 
                  output_activation = ['sigmoid'], 
                  loss_func = ['binary_crossentropy'], 
                  #batch_size = [1], 
                  epochs = [30])

hamming_scorer = metrics.make_scorer(metrics.hamming_loss)
grid = GridSearchCV(estimator = model, param_grid = param_grid, scoring = hamming_scorer, cv = 5, verbose = 1)

In [75]:
grid.fit(X_sepPCs_train, Y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


ValueError: Classification metrics can't handle a mix of multilabel-indicator and multiclass targets