# Multiclass Classification

### Import data

In [1]:
from keras.datasets import reuters

(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/text-datasets/reuters.npz


In [3]:
print("Length of train data:", len(train_data))
print("Length of test data:", len(test_data))

Length of train data: 8982
Length of test data: 2246


In [7]:
## Take a look at our data

# Print an encoded comment
print("11th newswire (encoded):", train_data[10], sep='\n', end='\n\n') 

# Print a decoded comment
word_index = reuters.get_word_index() 
reverse_word_index = dict([(v, k) for (k, v) in word_index.items()])
decoded_newswire = ' '.join([reverse_word_index.get(i-3, '?') for i in train_data[10]]) # Remark 1
print("11th newswire (decoded):", decoded_newswire, sep='\n')


# Remark 1: Indices are offset by 3 because 0, 1, and 2 are reserved indices for "padding", "start of sequence", and "unknown".
# - The first `?` is translated from a 1, the start token; 
# - The `?` between `jane` and `satirical` corresponds to a 2, an unknown word (not one of the most 10,000 frequently occurred words).

11th newswire (encoded):
[1, 245, 273, 207, 156, 53, 74, 160, 26, 14, 46, 296, 26, 39, 74, 2979, 3554, 14, 46, 4689, 4329, 86, 61, 3499, 4795, 14, 61, 451, 4329, 17, 12]

11th newswire (decoded):
? period ended december 31 shr profit 11 cts vs loss 24 cts net profit 224 271 vs loss 511 349 revs 7 258 688 vs 7 200 349 reuter 3


### Preparing the data

In [8]:
import numpy as np

def onehot_encode(seq, dim=10000):
    results = np.zeros((len(seq), dim))
    for i, seq in enumerate(seq):
        results[i, seq] = 1
    return results

x_train = onehot_encode(train_data)
x_test = onehot_encode(test_data)

In [9]:
def onehot_label(labels, dim=46):
    results = np.zeros((len(labels), dim))
    for i, label in enumerate(labels):
        results[i, label] = 1
    return results

y_train = onehot_label(train_labels)
y_test = onehot_label(test_labels)

In [10]:
# As a remark, Keras has a built-in `to_categorical` function:
# 
# from keras.utils.np_utils import to_categorical
# y_train = to_categorical(train_labels)
# y_test = to_categorical(test_labels)

### Construct the Model

In [None]:
from keras import models, layers

"""
Input   : (10000,) 
    Layer 1 : (10000 => 64) | relu
    Layer 2 : (64    => 64) | relu
    Layer 3 : (64    => 46) | softmax
Result  : (1,)
"""

model = models.Sequential()

model.add(layers.Dense(64, activation='relu', input_shape=(10000,))) # 
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(46, activation='softmax'))