## 11.1 Reuters Topic Classification with Keras- LSTM version
TA: Bokyung Son (*Computational Linguistics Lab*)

In this lab, we train several RNN variants on the Reuters Newswire topic classification task.
1. Vanilla 1-layer LSTM
2. Stacked LSTM
3. Bidirectional LSTM
4. Stacked bidirectional LSTM

In [1]:
import sys, os
import pickle
import numpy as np
from keras.preprocessing import sequence
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Embedding, Dense, LSTM, Bidirectional
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.datasets import reuters

Using TensorFlow backend.


### Load dataset

In [2]:
max_features = 20000  # use top `max_features` most common words
maxlen = 80  # trim after `maxlen` number of words

In [3]:
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_features, test_split=0.2)

In [5]:
# check the structure
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
print(x_train[0])

8982 train sequences
2246 test sequences
[1, 2, 2, 8, 43, 10, 447, 5, 25, 207, 270, 5, 3095, 111, 16, 369, 186, 90, 67, 7, 89, 5, 19, 102, 6, 19, 124, 15, 90, 67, 84, 22, 482, 26, 7, 48, 4, 49, 8, 864, 39, 209, 154, 6, 151, 6, 83, 11, 15, 22, 155, 11, 15, 7, 48, 9, 4579, 1005, 504, 6, 258, 6, 272, 11, 15, 22, 134, 44, 11, 15, 16, 8, 197, 1245, 90, 67, 52, 29, 209, 30, 32, 132, 6, 109, 15, 17, 12]


In [6]:
# Check labels: 0~45
num_classes = np.max(y_train) + 1
print(num_classes, 'classes')

46 classes


### Preprocessing

In [None]:
# Pad
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

# `pad_sequences` returns a 2d numpy array (num_samples, num_timesteps) 
# `num_timesteps` equals `maxlen`
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

In [None]:
# Vectorize labels
print('Convert class vector to binary class matrix (for categorical_crossentropy)')
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

### Commons

In [None]:
MAX_EPOCHS=30
BATCH_SIZE=128
EMBEDDING_DIM=128

In [None]:
model_names = ['mlp', 'vanilla', 'stacked', 'bi']

BASE_PATH = './reuters_model/'
if not os.path.exists(BASE_PATH):
    os.mkdir(BASE_PATH)

In [None]:
# early stopping callback
early_stopping = EarlyStopping(monitor='val_loss',
                               patience=3)

In [None]:
# model checkpoint callback
def create_checkpoint(model_name):
    # creates a subdirectory under `BASE_PATH`
    MODEL_PATH = os.path.join(BASE_PATH, model_name)
    if not os.path.exists(MODEL_PATH):
        os.mkdir(MODEL_PATH)
    
    return ModelCheckpoint(filepath=os.path.join(MODEL_PATH, '{epoch:02d}-{val_loss:.4f}.hdf5'),
                           monitor='val_loss',
                           verbose=1,
                           save_best_only=True)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

def plot_train(history):
    fig, loss_ax = plt.subplots(figsize=(6,6))
    acc_ax = loss_ax.twinx()

    loss_ax.plot(history.history['loss'], 'y', label='train loss')
    loss_ax.plot(history.history['val_loss'], 'r', label='val loss')
    loss_ax.set_ylim([0.0, 3.0])

    acc_ax.plot(history.history['acc'], 'b', label='train acc')
    acc_ax.plot(history.history['val_acc'], 'g', label='val acc')
    acc_ax.set_ylim([0.0, 1.0])

    loss_ax.set_xlabel('epoch')
    loss_ax.set_ylabel('loss')
    acc_ax.set_ylabel('accuray')

    loss_ax.legend(loc='upper left')
    acc_ax.legend(loc='lower left')

    plt.show()

### 1. Vanilla 1-layer LSTM
See [documentation](https://keras.io/layers/recurrent/#lstm)

In [None]:
# 1. Build model
model = Sequential()
model.add(Embedding(max_features, EMBEDDING_DIM, input_length=maxlen)) # output: (batch_size, timesteps, input_dim)
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) # input: (batch_size, timesteps, input_dim)
model.add(Dense(num_classes, activation='softmax'))

# 2. Compile model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# 3. Train model
history = model.fit(x_train, y_train,
                    validation_split=0.2,
                    epochs=MAX_EPOCHS,
                    batch_size=BATCH_SIZE,
                    callbacks=[early_stopping, create_checkpoint('vanilla')])

plot_train(history)

# 4. Evaluate model
loss, acc = model.evaluate(x_test, y_test, batch_size=BATCH_SIZE)
print('----- Evaluation loss and metrics -----')
print('Test loss:', loss)
print('Test accuracy:', acc)

### 2. Stacked LSTM

In [None]:
# 1. Build model
model = Sequential()
model.add(Embedding(max_features, EMBEDDING_DIM, input_length=maxlen))

# YOUR CODE HERE
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128))

model.add(Dense(num_classes, activation='softmax'))

# 2. Compile model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# 3. Train model
history = model.fit(x_train, y_train,
                    validation_split=0.2,
                    epochs=MAX_EPOCHS,
                    batch_size=BATCH_SIZE,
                    callbacks=[early_stopping, create_checkpoint('stacked')])

plot_train(history)
    
# 4. Evaluate model
loss, acc = model.evaluate(x_test, y_test, batch_size=BATCH_SIZE)
print('----- Evaluation loss and metrics -----')
print('Test loss:', loss)
print('Test accuracy:', acc)

### 3. Bidirectional LSTM
See [documentation](https://keras.io/layers/wrappers/)

In [None]:
# 1. Build model
model = Sequential()
model.add(Embedding(max_features, EMBEDDING_DIM, input_length=maxlen))

# YOUR CODE HERE
# 앞으로보는놈 뒤로보는놈 
model.add(Bidirectional(LSTM(128)))
model.add(Dense(num_classes, activation='softmax'))
        
# 2. Compile model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# 3. Train model
history = model.fit(x_train, y_train,
                    validation_split=0.2,
                    epochs=MAX_EPOCHS,
                    batch_size=BATCH_SIZE,
                    callbacks=[early_stopping, create_checkpoint('bi')])

plot_train(history)

# 4. Evaluate model
loss, acc = model.evaluate(x_test, y_test, batch_size=BATCH_SIZE)

print('----- Evaluation loss and metrics -----')
print('Test loss:', loss)
print('Test accuracy:', acc)

### 4. Stacked bi-LSTM

In [None]:
# TODO: YOUR CODE HERE


