In [1]:
# write in train.py
import sys, os
sys.path.append(os.pardir)

In [2]:
import tensorflow as tf
import json
import argparse

from data_utils import Data
from models.char_cnn_zhang import CharCNNZhang
from models.char_cnn_kim import CharCNNKim

Using TensorFlow backend.


In [3]:
parser = argparse.ArgumentParser()
parser.add_argument('--model', type=str, default='char_cnn_zhang', help='Specifies which model to use: char_cnn_zhang or char_cnn_kim')
FLAGS = parser.parse_args(["--model", "char_cnn_zhang"])

# Load configurations
config = json.load(open('../config.json'))

# change key from 'model' to 'char_cnn_zhang'
model_name = config['model'] # char_cnn_zhang
config['model'] = config[model_name]

# Set the data path in order to run in the notebook 
config['data']["training_data_source"] = '../data/ag_news_csv/train.csv'
config['data']["validation_data_source"] = '../data/ag_news_csv/test.csv'

# Load training data
training_data = Data(data_source=config["data"]["training_data_source"],
                     alphabet=config["data"]["alphabet"],
                     input_size=config["data"]["input_size"],
                     num_of_classes=config["data"]["num_of_classes"])
training_data.load_data()
training_inputs, training_labels = training_data.get_all_data()

# Load validation data
validation_data = Data(data_source=config["data"]["validation_data_source"],
                       alphabet=config["data"]["alphabet"],
                       input_size=config["data"]["input_size"],
                       num_of_classes=config["data"]["num_of_classes"])
validation_data.load_data()
validation_inputs, validation_labels = validation_data.get_all_data()


Data loaded from ../data/ag_news_csv/train.csv
Data loaded from ../data/ag_news_csv/test.csv


In [6]:
config

{'char_cnn_kim': {'conv_layers': [[256, 10], [256, 7], [256, 5], [256, 3]],
  'dropout_p': 0.1,
  'embedding_size': 128,
  'fully_connected_layers': [1024, 1024],
  'loss': 'categorical_crossentropy',
  'optimizer': 'adam',
  'threshold': 1e-06},
 'char_cnn_zhang': {'conv_layers': [[256, 7, 3],
   [256, 7, 3],
   [256, 3, -1],
   [256, 3, -1],
   [256, 3, -1],
   [256, 3, 3]],
  'dropout_p': 0.5,
  'embedding_size': 128,
  'fully_connected_layers': [1024, 1024],
  'loss': 'categorical_crossentropy',
  'optimizer': 'adam',
  'threshold': 1e-06},
 'data': {'alphabet': 'abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:\'"/\\|_@#$%^&*~`+-=<>()[]{}',
  'alphabet_size': 69,
  'input_size': 1014,
  'num_of_classes': 4,
  'training_data_source': '../data/ag_news_csv/train.csv',
  'validation_data_source': '../data/ag_news_csv/test.csv'},
 'model': {'conv_layers': [[256, 7, 3],
   [256, 7, 3],
   [256, 3, -1],
   [256, 3, -1],
   [256, 3, -1],
   [256, 3, 3]],
  'dropout_p': 0.5,
  'embedding_size': 

In [5]:
config['model']

{'conv_layers': [[256, 7, 3],
  [256, 7, 3],
  [256, 3, -1],
  [256, 3, -1],
  [256, 3, -1],
  [256, 3, 3]],
 'dropout_p': 0.5,
 'embedding_size': 128,
 'fully_connected_layers': [1024, 1024],
 'loss': 'categorical_crossentropy',
 'optimizer': 'adam',
 'threshold': 1e-06}

In [17]:
config['data']

{'alphabet': 'abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:\'"/\\|_@#$%^&*~`+-=<>()[]{}',
 'alphabet_size': 69,
 'input_size': 1014,
 'num_of_classes': 4,
 'training_data_source': '../data/ag_news_csv/train.csv',
 'validation_data_source': '../data/ag_news_csv/test.csv'}

# See the model at a glance

In [29]:
model = CharCNNZhang(input_size=config["data"]["input_size"],
                             alphabet_size=config["data"]["alphabet_size"],
                             embedding_size=config["model"]["embedding_size"],
                             conv_layers=config["model"]["conv_layers"],
                             fully_connected_layers=config["model"]["fully_connected_layers"],
                             num_of_classes=config["data"]["num_of_classes"],
                             threshold=config["model"]["threshold"],
                             dropout_p=config["model"]["dropout_p"],
                             optimizer=config["model"]["optimizer"],
                             loss=config["model"]["loss"])

CharCNNZhang model built: 
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sent_input (InputLayer)      (None, 1014)              0         
_________________________________________________________________
embedding_4 (Embedding)      (None, 1014, 128)         8960      
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 1008, 256)         229632    
_________________________________________________________________
thresholded_re_lu_1 (Thresho (None, 1008, 256)         0         
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 336, 256)          0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 330, 256)          459008    
_________________________________________________________________
thresholded_re_lu_2 (Thresho (None, 330, 256)    

## Construct layer by layer

In [50]:
from keras.models import Model
from keras.layers import Input, Embedding, Conv1D, Activation, MaxPooling1D, Dense, Flatten, Dropout

In [39]:
# parameter 

input_size = config['data']['input_size'] # 1014
alphabet_size = config['data']['alphabet_size'] # 69
embedding_size = config['model']['embedding_size'] # 128
conv_layers=config["model"]["conv_layers"] # [[256, 7, 3], [256, 7, 3], [256, 3, -1], [256, 3, -1], [256, 3, -1], [256, 3, 3]]

fully_connected_layers=config["model"]["fully_connected_layers"] # [1024, 1024]
num_of_classes=config["data"]["num_of_classes"] # 4
threshold=config["model"]["threshold"] # 1e-06
dropout_p=config["model"]["dropout_p"] # 0.5
optimizer=config["model"]["optimizer"] # adam
loss=config["model"]["loss"] # categorical_crossentropy

The output variable of embedding must be same with the input of conv layer. Because in the conv for loop, every time it will start with a `Conv1D`, if we set the embeding output as `embedding`, it will casue a error. 


```
# Embedding layer
# the output should be the same with conv
embedding = Embedding(alphabet_size+1, embedding_size, input_length=input_size)(inputs)
# Conv 
for filter_num, filter_size, pooling_size in conv_layers:
    conv = Conv1D(filter_num, filter_size)(embedding)
```

In [63]:
'''
Uses an embedding layer, followed by a convolutional, 
max-pooling and softmax layer.
'''


# Input 
inputs = Input(shape=(input_size,), name='sent_input', dtype='int64')  # shape=(?, 1014)
# Embedding layer
conv = Embedding(alphabet_size+1, embedding_size, input_length=input_size)(inputs)
# Conv 
for filter_num, filter_size, pooling_size in conv_layers:
    conv = Conv1D(filter_num, filter_size)(conv) 
    conv = Activation('relu')(conv)
    if pooling_size != -1:
        conv = MaxPooling1D(pool_size=pooling_size)(conv) # Final shape=(None, 34, 256)
x = Flatten()(conv) # (None, 8704)
# Fully connected layers 
for dense_size in fully_connected_layers:
    x = Dense(dense_size, activation='relu')(x) # dense_size == 1024
    x = Dropout(dropout_p)(x)
# Output Layer
predictions = Dense(num_of_classes, activation='softmax')(x)
# Build model
model = Model(inputs=inputs, outputs=predictions)
model.compile(optimizer=optimizer, loss=loss) # Adam, categorical_crossentropy
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sent_input (InputLayer)      (None, 1014)              0         
_________________________________________________________________
embedding_14 (Embedding)     (None, 1014, 128)         8960      
_________________________________________________________________
conv1d_30 (Conv1D)           (None, 1008, 256)         229632    
_________________________________________________________________
activation_20 (Activation)   (None, 1008, 256)         0         
_________________________________________________________________
max_pooling1d_13 (MaxPooling (None, 336, 256)          0         
_________________________________________________________________
conv1d_31 (Conv1D)           (None, 330, 256)          459008    
_________________________________________________________________
activation_21 (Activation)   (None, 330, 256)          0         
__________

# train the model 

Because here I just use CPU to run the model, so I only use 1000 samples for trianing and 100 samples for testing.

In [78]:

training_inputs=training_inputs[:1000]
training_labels=training_labels[:1000]
validation_inputs=validation_inputs[:100]
validation_labels=validation_labels[:100]
# epochs=config["training"]["epochs"] # 5000
epochs =10
batch_size=config["training"]["batch_size"] # 128
# checkpoint_every=config["training"]["checkpoint_every"] # 100
checkpoint_every = 1

In [79]:
# Create callbacks
from keras.callbacks import TensorBoard
tensorboard = TensorBoard(log_dir='./logs', histogram_freq=checkpoint_every, batch_size=batch_size,
                          write_graph=False, write_grads=True, write_images=False,
                          embeddings_freq=checkpoint_every,
                          embeddings_layer_names=None)

# Training
model.fit(training_inputs, training_labels,
          validation_data=(validation_inputs, validation_labels),
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          callbacks=[tensorboard])


Train on 1000 samples, validate on 100 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1cbe073b00>

Because the sample number is too small, after 10 epochs, model overfit the data. 