In [None]:
import numpy as np
import pandas as pd
import re
import json
import keras.backend as K
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.optimizers import SGD
import custom_callbacks
from vdcnn import *
from datetime import datetime
from sklearn.model_selection import train_test_split

In [None]:
DATA_IN_PATH = './data_in/'
DATA_OUT_PATH = './data_out/'
INPUT_TRAIN_DATA = 'train_input.npy'
INPUT_LABEL_DATA = 'train_label.npy'
DATA_CONFIGS = 'config_data.json'

input_data = np.load(open(DATA_IN_PATH + INPUT_TRAIN_DATA, 'rb'))
input_label = np.load(open(DATA_IN_PATH + INPUT_LABEL_DATA, 'rb'))
pre_config = json.load(open(DATA_IN_PATH + DATA_CONFIGS, 'r'))



In [None]:
TEST_SPLIT = 0.2
SEED = 42
VOCAB_SIZE = pre_config['vocab_size']+1
EMB_SIZE = 256
BATCH_SIZE = 16
NUM_EPOCH = 100
# x data y label
X_train, X_test, Y_train, Y_test = train_test_split(input_data, input_label, test_size=TEST_SPLIT, random_state=SEED)


In [None]:
def train(x_train, y_train, x_test, y_test):

    # Init Keras Model here
    model = VDCNN(num_classes=3,
                  sequence_length=512,
                  shortcut=False,
                  pool_type='max',
                  sorted=False,
                  use_bias=False,embedding_dim=EMB_SIZE)

    model.compile(optimizer=SGD(lr=0.01, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

    model_json = model.to_json()
    with open("vdcnn_model.json","w") as json_file:
        json_file.write(model_json)                    # Save model architecture
    time_str = datetime.now().isoformat()
    print("{}: Model saved as json.".format(time_str))
    print("")

    # Trainer
    # Tensorboard and extra callback to support steps history
    tensorboard = TensorBoard(log_dir='./logs', histogram_freq=50, batch_size=BATCH_SIZE, write_graph=True, write_images=True)
    checkpointer = ModelCheckpoint(filepath="./checkpoints/vdcnn_weights_val_acc_{val_acc:.4f}.h5", period=1,
                                   verbose=1, save_best_only=True, mode='max', monitor='val_acc')
    loss_history = custom_callbacks.loss_history(model, tensorboard)
    evaluate_step = custom_callbacks.evaluate_step(model, checkpointer, tensorboard, 100, BATCH_SIZE, x_test, y_test)

    # Fit model
    model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=NUM_EPOCH, validation_data=(x_test, y_test),
              verbose=1, callbacks=[checkpointer, tensorboard, loss_history, evaluate_step])
    print('-'*30)
    time_str = datetime.now().isoformat()
    print("{}: Done training.".format(time_str))
    K.clear_session()
    print('-'*30)
    print()

if __name__=='__main__':
    train(x_train=X_train, y_train=Y_train, x_test=X_test, y_test=Y_test)

