In [1]:
### Package Import
import vdcnn
import pandas as pd
from utils import CharEmbeddedEncoder
import numpy as np
from datetime import datetime as dt
import random
from sklearn.metrics import confusion_matrix
import gc
from os import makedirs
import matplotlib.pyplot as plt

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Embedding, Conv1D, BatchNormalization, Activation, Dense, Lambda, Dropout
from keras.layers.pooling import MaxPooling1D
from keras.optimizers import SGD

  return f(*args, **kwds)
Using TensorFlow backend.


In [2]:
### Function Definition
def to_categorical(y, nb_classes=None):
    y = np.asarray(y, dtype='int32')
    if not nb_classes:
        nb_classes = np.max(y) + 1
    Y = np.zeros((len(y), nb_classes))
    for i in range(len(y)):
        Y[i, y[i]] = 1.
    return Y

def _top_k(x):
    x = tf.transpose(x, [0, 2, 1])
    k_max = tf.nn.top_k(x, k=top_k)
    return tf.reshape(k_max[0], (-1, num_filters[-1] * top_k))

def create_model(num_classes, num_filters=[64, 128, 256, 512], top_k=3, learning_rate=0.01, input_dim=69):
    """
    Create VDCNN Model
    """
    model = Sequential()
    model.add(Embedding(input_dim=69, output_dim=16, input_length=1014, name='input_embedding'))
    model.add(Conv1D(filters=64, kernel_size=3, strides=2, padding="same"))
    
    for i in range(len(num_filters)):
        conv_filter= num_filters[i]
        """
        Build Convolutional Block
        """
        conv_block = Sequential()
        conv_block.add(Conv1D(filters=conv_filter, 
                              kernel_size=3, 
                              strides=1, 
                              padding='same', 
                              input_shape=list(model.get_output_shape_at(0))[1:]))
        conv_block.add(BatchNormalization())
        conv_block.add(Activation('relu'))
        conv_block.add(Conv1D(filters=conv_filter, 
                              kernel_size=3, 
                              strides=1, 
                              padding='same'))
        conv_block.add(BatchNormalization())
        conv_block.add(Activation('relu'))
        
        model.add(conv_block)
        model.add(MaxPooling1D(pool_size=3, strides=2, padding="same"))
    
    model.add(Lambda(_top_k, output_shape=(num_filters[-1] * top_k,)))
    model.add(Dense(2048, activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(0.2, seed=23))
    model.add(Dense(2048, activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(0.2, seed=23))
    model.add(Dense(num_classes, activation='softmax', name='output_layer'))
    sgd = SGD(lr=learning_rate, decay=1e-6, momentum=0.9, nesterov=False)
    model.summary()
    model.compile(optimizer=sgd, loss='mean_squared_error', metrics=['accuracy'])
    return model    

In [3]:
### Class  Definition
class CharEmbeddedEncoder:
    """
    An encoder for character embedding based on "Text Understanding from Scratch"
        URL: https://arxiv.org/pdf/1502.01710.pdf
    """
    np = __import__('numpy')
    mp = __import__('multiprocessing')
    def __init__(self, n_jobs=2, sequence_max_length=1014):
        self.alphabet =  'abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:\'"/|_#$%^&*~`+=<>()[]{}\\ \n'
        self.char_dict = {}
        self.sequence_max_length = sequence_max_length
        self.n_jobs = n_jobs
        for i,c in enumerate(self.alphabet):
            self.char_dict[c] = i
        self.char_dict_len = len(self.char_dict)+1
                          
    def char2vec(self, text):
        data = self.np.ones(self.sequence_max_length) * self.char_dict_len
        for i in range(len(text)):
            if text[i] in self.char_dict:
                data[i] = self.char_dict[text[i]]
            else:
                data[i] = self.char_dict_len - 1
            if i > self.sequence_max_length:
                return data
        return data
    
    def transform(self, documents):
        char_vecs = []
        for document in documents:
            char_vecs.append(self.char2vec(document))
        return self.np.asarray(char_vecs, dtype=int)

In [4]:
### Global Variable Declaration
random_seed = 23
batch_size = 128

In [5]:
### Main Procedure
s = dt.now()
train_data = pd.read_csv('data/ag_news_csv/train.csv', index_col=False, header=None, names=['class', 'title', 'content'])
test_data = pd.read_csv('data/ag_news_csv/test.csv', index_col=False, header=None, names=['class', 'title', 'content'])
f = open('data/ag_news_csv/classes.txt', 'r')
classes = [i.replace('\n','') for i in list(f)]
num_classes = len(classes)
f.close()
encoder = CharEmbeddedEncoder(n_jobs=4)
train_X = encoder.transform(train_data['content'].as_matrix())
train_data['class'] = train_data['class']-1
train_Y = train_data['class'].as_matrix()
train_Y = to_categorical(train_Y, nb_classes=num_classes)

test_X = encoder.transform(test_data['content'].as_matrix())
test_data['class'] = test_data['class']-1
test_Y = test_data['class'].as_matrix()
e = dt.now()
p = e - s
print('Prepare Data consume:{}'.format(p))

model = vdcnn.create_model(num_classes=num_classes, input_dim=encoder.char_dict_len)
model.summary()

makedirs('model', exist_ok=True)
makedirs('figure', exist_ok=True)
makedirs('performance', exist_ok=True)

losses = []
accuracies = []
epoch = 1
threshold = 0.95
acc_of_epoch = 0.0
while acc_of_epoch < threshold:
    shuffle_index = [i for i in range(len(train_X))]
    random.seed = random_seed + epoch
    random.shuffle(shuffle_index)

    tmp = []
    for i in range(len(train_X)):
        tmp.append(train_X[shuffle_index[i]])
    train_X = np.asarray(tmp)

    tmp = []
    for i in range(len(train_Y)):
        tmp.append(train_Y[shuffle_index[i]])
    train_Y = np.asarray(tmp)
    del(tmp)

    loss_of_epoch = []
    acc_of_epoch = []
    for start_index in range(0, len(train_X), batch_size):
        end_index = start_index + batch_size
        if end_index > len(train_X):
            end_index = len(train_X)
        batch_train_X = train_X[start_index: end_index]
        batch_train_Y = train_Y[start_index: end_index]
        rtn = model.train_on_batch(x=batch_train_X, y=batch_train_Y)
        loss = rtn[0]
        acc = rtn[1]
        loss_of_epoch.append(loss)
        acc_of_epoch.append(acc)
        del(batch_train_X)
        del(batch_train_Y)
        gc.collect()
    loss_of_epoch = np.mean(loss_of_epoch)
    acc_of_epoch = np.mean(acc_of_epoch)
    losses.append(loss_of_epoch)
    accuracies.append(acc_of_epoch)
    print('Epoch {} completed, loss - {}, acc - {}'.format(epoch, loss_of_epoch, acc_of_epoch))
    break
    if epoch % 10 == 0:
        model.save('model/ag_news_shuffle_e{}.mdl'.format(epoch))
        plt.plot(losses)
        plt.savefig('figure/loss_e{}.png'.format(epoch))
        plt.close('all')
        plt.clf()
        plt.plot(accuracies)
        plt.savefig('figure/accuracy_e{}.png'.format(epoch))
        plt.close('all')
        plt.clf()
    epoch += 1
e = dt.now()
p = e - s
print('Training Model consume:{}'.format(p))

model.save('model/ag_news_shuffle_e{}.mdl'.format(epoch))

s = dt.now()
predict_Y = model.predict_classes(test_X)
e = dt.now()
p = e - s
print('Predict consume:{}'.format(p))

print(confusion_matrix(test_Y, predict_Y))
pd.DataFrame(confusion_matrix(test_Y, predict_Y)).to_csv('performance/ag_news_shuffle.csv')

plt.plot(losses)
plt.savefig('figure/loss.png')
plt.close('all')
plt.clf()

plt.plot(accuracies)
plt.savefig('figure/accuracy.png')
plt.close('all')
plt.clf()


Prepare Data consume:0:00:17.636716
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_embedding (Embedding)  (None, 1014, 16)          1104      
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 507, 64)           3136      
_________________________________________________________________
sequential_2 (Sequential)    (None, 507, 64)           25216     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 254, 64)           0         
_________________________________________________________________
sequential_3 (Sequential)    (None, 254, 128)          75008     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 127, 128)          0         
_________________________________________________________________
sequential_4 (Sequential)    (None, 127,

InvalidArgumentError: indices[0,0] = 69 is not in [0, 69)
	 [[Node: input_embedding/Gather = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input_embedding/embeddings/read, input_embedding/Cast)]]

Caused by op 'input_embedding/Gather', defined at:
  File "/Users/Julius/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/Julius/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-ba512874309d>", line 22, in <module>
    model = vdcnn.create_model(num_classes=num_classes, input_dim=encoder.char_dict_len)
  File "/Users/Julius/git_repo/vdcnn-keras/vdcnn.py", line 10, in create_model
    model.add(Embedding(input_dim=69, output_dim=16, input_length=1014, name='input_embedding'))
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/keras/models.py", line 467, in add
    layer(x)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/keras/engine/topology.py", line 617, in __call__
    output = self.call(inputs, **kwargs)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/keras/layers/embeddings.py", line 138, in call
    out = K.gather(self.embeddings, inputs)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 1208, in gather
    return tf.gather(reference, indices)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 2486, in gather
    params, indices, validate_indices=validate_indices, name=name)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1834, in gather
    validate_indices=validate_indices, name=name)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/Users/Julius/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): indices[0,0] = 69 is not in [0, 69)
	 [[Node: input_embedding/Gather = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input_embedding/embeddings/read, input_embedding/Cast)]]
