# Tokenizer and pad_sequences

In [1]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

text1 = 'Some ThING to eat !'
text2 = 'some thing to drink .'
text3 = 'some thing'
texts=[text1,text2, text3]
print(texts)
print('========')
tokenizer = Tokenizer(num_words=100) #num_words:None或整数,处理的最大单词数量。少于此数的单词丢掉
tokenizer.fit_on_texts(texts)
print(tokenizer.word_counts) 
print(tokenizer.word_index) 

print('========')
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index
print(sequences)
print('found {} unique tokens'.format(len(word_index)))

['Some ThING to eat !', 'some thing to drink .', 'some thing']
OrderedDict([('some', 3), ('thing', 3), ('to', 2), ('eat', 1), ('drink', 1)])
{'some': 1, 'thing': 2, 'to': 3, 'eat': 4, 'drink': 5}
[[1, 2, 3, 4], [1, 2, 3, 5], [1, 2]]
found 5 unique tokens


In [2]:
# 把向量長度補齊
pad_seq = pad_sequences(sequences, maxlen=8, padding='post')
print(pad_seq)

[[1 2 3 4 0 0 0 0]
 [1 2 3 5 0 0 0 0]
 [1 2 0 0 0 0 0 0]]


# MNIST

In [3]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, SimpleRNN
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.datasets import mnist

# load mnist dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
# compute the number of labels
num_labels = len(np.unique(y_train))

In [45]:
num_labels

10

In [5]:
# convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [6]:
y_train[:10]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)

In [7]:
x_train[:10]

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 

In [8]:
# resize and normalize
image_size = x_train.shape[1]
x_train = np.reshape(x_train,[-1, image_size, image_size])
x_test = np.reshape(x_test,[-1, image_size, image_size])
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

In [62]:
256*256*2+256

131328

In [59]:
# network parameters
input_shape = (image_size, image_size)
batch_size = 128
units = 256

# model is RNN with 256 units, input is 28-dim vector 28 timesteps
from tensorflow.keras.layers import Bidirectional
model = Sequential()
# input_ shape = (timesteps, input_dim)
model.add(SimpleRNN(units=units,
                    input_shape=input_shape,return_sequences=True)   )
model.add(SimpleRNN(units=256, return_sequences=False ) )
# model.add(Bidirectional(tf.keras.layers.LSTM( (256,256)))
model.add(Dense(num_labels))
model.add(Activation('softmax'))
model.summary()

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_23 (SimpleRNN)    (None, 28, 256)           72960     
_________________________________________________________________
simple_rnn_24 (SimpleRNN)    (None, 256)               131328    
_________________________________________________________________
dense_12 (Dense)             (None, 10)                2570      
_________________________________________________________________
activation_3 (Activation)    (None, 10)                0         
Total params: 206,858
Trainable params: 206,858
Non-trainable params: 0
_________________________________________________________________


In [24]:
256*256+28*256+256

72960

In [10]:
np.__version__

'1.18.5'

In [37]:
x_train.shape

(25000, 80)

In [38]:
image_size

28

In [11]:
# loss function for one-hot vector
# use of sgd optimizer
# accuracy is good metric for classification tasks
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])
# train the network
model.fit(x_train, y_train, epochs=20, batch_size=batch_size)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1f15c4ef730>

In [7]:
_, acc = model.evaluate(x_test,
                        y_test,
                        batch_size=batch_size,
                        verbose=0)
print("\nTest accuracy: %.1f%%" % (100.0 * acc))


Test accuracy: 98.0%


In [28]:
y_train[:5]

array([1, 0, 0, 1, 0], dtype=int64)

In [29]:
batch_size = 32
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batch_size, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batch_size, drop_remainder=True)

In [30]:
embedding_len =32

class MyRNN(tf.keras.Model):

    def __init__(self, units):
        super(MyRNN, self).__init__()

        # [b, 64]
        self.state = [tf.zeros([batch_size, units])]
        # self.state1 = [tf.zeros([batchsz, units])]
        # transform text to embedding representation
        # [b, 80] => [b, 80, 32]
        self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
                                          input_length=max_review_len)
        # [b, 80, 32] , h_dim: 64
        # RNN: cell1 ,cell2, cell3
        # SimpleRNN
        self.rnn_cell = tf.keras.layers.SimpleRNNCell(units, dropout=0.2)
        # self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.5)
        # fc, [b, 80, 32] => [b, 64] => [b, 1]
        self.fc= tf.keras.layers.Dense(1) # 二元分類也可以輸出層只給1個神經元，但後面的loss要用binary_cross_entropy

    def call(self, inputs, training=None):
        # [b, 80]
        x = inputs
        # embedding: [b, 80] => [b, 80, 32]
        x = self.embedding(x)
        # rnn cell compute
        # [b, 80, 32] => [b, 64]
        state = self.state
        # state1 = self.state1
        for word in tf.unstack(x, axis=1): # word: [b, 32]
            # h1 = x*wxh+h0*whh
            # out: [b, 64]
            out, state = self.rnn_cell(word, state, training)
        # out: [b, 64] => [b, 1]
        x = self.fc(out)
        # p(y is pos|x)
        prob = tf.sigmoid(x)

        return prob

model = MyRNN(64) 
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [23]:
history = model.fit(db_train,
                    epochs=5,
                    batch_size=512,
                    validation_data=db_test,
                    verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
