In [1]:
from keras.datasets import imdb
from keras import Model, Sequential
from keras.layers import *
from keras import Input

Using TensorFlow backend.


In [31]:
from keras.preprocessing import text
from keras.preprocessing import sequence

In [33]:
max_len = 500
dimension = 10000

In [35]:
def vectorize_sequences(sequences, dimension = 10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

In [52]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=dimension)
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_train = vectorize_sequences(x_train, dimension)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)
x_test = vectorize_sequences(x_test, dimension)

In [53]:
print(x_train.shape, y_train.shape)
print(len(x_train), len(x_test))
print(x_train[0])

(25000, 10000) (25000,)
25000 25000
[0. 0. 1. ... 0. 0. 0.]


In [59]:
sequence_model = Sequential()
sequence_model.add(Dense(16, activation = 'relu', input_shape = (x_train.shape[-1],)))
sequence_model.add(Dropout(0.5))
sequence_model.add(Dense(16, activation = 'relu'))
sequence_model.add(Dense(1, activation = 'sigmoid'))

sequence_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_19 (Dense)             (None, 16)                160016    
_________________________________________________________________
dropout_1 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_20 (Dense)             (None, 16)                272       
_________________________________________________________________
dense_21 (Dense)             (None, 1)                 17        
Total params: 160,305
Trainable params: 160,305
Non-trainable params: 0
_________________________________________________________________


In [60]:
input_tensor = Input(shape = (x_train.shape[-1],))
x = Dense(16, activation = 'relu')(input_tensor)
x = Dropout(0.5)(x)
x = Dense(16, activation = 'relu')(x)
output_tensor = Dense(1, activation = 'sigmoid')(x)
model = Model(input_tensor, output_tensor)

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 10000)             0         
_________________________________________________________________
dense_22 (Dense)             (None, 16)                160016    
_________________________________________________________________
dropout_2 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_23 (Dense)             (None, 16)                272       
_________________________________________________________________
dense_24 (Dense)             (None, 1)                 17        
Total params: 160,305
Trainable params: 160,305
Non-trainable params: 0
_________________________________________________________________


In [61]:
sequence_model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['acc'])
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['acc'])

In [62]:
sequence_history = sequence_model.fit(
    x_train, y_train,
    epochs = 10,
    batch_size = 128,
    validation_split = 0.2
)

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [63]:
api_history = model.fit(
    x_train, y_train,
    epochs = 10,
    batch_size = 128,
    validation_split = 0.2
)

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [64]:
print(sequence_model.evaluate(x_test, y_test))
print(model.evaluate(x_test, y_test))

[0.6098022779655456, 0.82212]
[0.647737896361351, 0.82144]


In [None]:
# multi-input model

In [69]:
reference_voca_size = 10000
question_voca_size = 10000
answer_voca_size = 500

In [72]:
from keras.layers import Embedding, LSTM
from keras import layers

refer_input = Input(shape = (None,), dtype='int32', name = 'refer')
embedded_refer = Embedding(reference_voca_size, 64)(refer_input)
encoded_refer = LSTM(32)(embedded_refer)

q_input = Input(shape = (None,), dtype = 'int32', name = 'question')
embedded_q = Embedding(reference_voca_size, 32)(q_input)
encoded_q = LSTM(32)(embedded_q)

concatenated = layers.concatenate([encoded_refer, encoded_q], axis = -1)

answer = Dense(answer_voca_size, activation = 'softmax')(concatenated)

model = Model([refer_input, q_input], answer)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
refer (InputLayer)              (None, None)         0                                            
__________________________________________________________________________________________________
question (InputLayer)           (None, None)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 64)     640000      refer[0][0]                      
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 32)     320000      question[0][0]                   
__________________________________________________________________________________________________
lstm_1 (LS

In [74]:
num_samples = 1000
max_len = 100

In [78]:
import numpy as np
from keras.utils import to_categorical
refer = np.random.randint(1, reference_voca_size, size = (num_samples, max_len))
question = np.random.randint(1, question_voca_size, size = (num_samples, max_len))
answer = np.random.randint(0, answer_voca_size, size = (num_samples,))
answer = to_categorical(answer)

print(refer.shape, question.shape, answer.shape)

(1000, 100) (1000, 100) (1000, 500)


In [80]:
model.compile(
    loss = 'categorical_crossentropy',
    optimizer = 'rmsprop',
    metrics = ['acc']
)
history = model.fit(
    {'refer': refer, 'question': question}, answer,
    epochs = 10,
    batch_size = 128,
    validation_split = 0.2
    )

Train on 800 samples, validate on 200 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
#multi-output model

In [83]:
from keras.layers import Conv1D, MaxPooling1D, GlobalMaxPooling1D
from keras import Input
from keras.models import Model

In [84]:
voca_size = 50000
num_income_grous = 10

In [97]:
post_input = Input(shape=(None,), dtype='int32', name='post')
embedded_input = Embedding(voca_size, 256)(post_input)
x = Conv1D(128, 5, activation='relu')(embedded_input)
x = MaxPooling1D(5)(x)
x = Conv1D(256, 5, activation='relu')(embedded_input)
x = Conv1D(256, 5, activation='relu')(embedded_input)
x = MaxPooling1D(5)(x)
x = Conv1D(256, 5, activation='relu')(embedded_input)
x = Conv1D(256, 5, activation='relu')(embedded_input)
x = GlobalMaxPool1D()(x)
x = Dense(128, activation='relu')(x)

In [98]:
age_prediction = Dense(1, name='age')(x)
income_prediction = Dense(num_income_grous, name='income')(x)
gender_predection = Dense(1, name='gender')(x)

model = Model(
    post_input,
    [age_prediction, income_prediction, gender_predection]
)

In [103]:
model.compile(
    optimizer = 'rmsprop',
    loss = {
        'age': 'mse',
        'income': 'categorical_crossentropy',
        'gender': 'binary_crossentropy'
    },
    loss_weights = {
        'age': 0.25,
        'income': 1.,
        'gender': 10.
    }
)

In [104]:
num_samples = 1000
max_len = 100

In [105]:
import numpy as np
from keras.utils import to_categorical
post = np.random.randint(1, voca_size, size = (num_samples, max_len))

y_age = np.random.randint(0, 100, size = (num_samples,))

y_income = np.random.randint(0, num_income_grous, size = (num_samples,))
y_income = to_categorical(y_income)

y_gender = np.random.randint(0, 2, size = (num_samples))

print(refer.shape, y_age.shape, y_income.shape, y_gender.shape)

(1000, 100) (1000,) (1000, 10) (1000,)


In [106]:
history = model.fit(
    post, { 'age': y_age, 'income': y_income, 'gender': y_gender },
    epochs = 10,
    batch_size = 64,
    validation_split = 0.2
)

Train on 800 samples, validate on 200 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
