In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model, layers, optimizers, datasets
import numpy as np

In [2]:
x = tf.random.normal(shape=(32,784,100), mean=0., stddev=1.)

In [3]:
lstm = layers.LSTM(units=10) 
# units : output의 dimension (hidden dim)

In [4]:
print('x:',x.shape)
output = lstm(x)
print('-'*40)
print('output:',output.shape)

x: (32, 784, 100)
----------------------------------------
output: (32, 10)


In [5]:
lstm2 = layers.LSTM(units=10, return_sequences=True, return_state=True)
# return_sequences : output sequence의 마지막 output을 return할 지(False) full sequence를 return할 지(True)
# return_state : output외에 last state(hidden, cell)도 return할 지 여부

In [6]:
print('x:',x.shape)
output2, final_hidden_state, final_cell_state = lstm2(x)
print('output2:',output2.shape)
print('final_hidden_state:',final_hidden_state.shape)
print('final_cell_state:',final_cell_state.shape)

x: (32, 784, 100)
output2: (32, 784, 10)
final_hidden_state: (32, 10)
final_cell_state: (32, 10)


### LSTM Application

In [7]:
class LSTM(Model):
    def __init__(self, units1, units2, num_classes):
        super(LSTM, self).__init__()
        self.sequential = tf.keras.Sequential([
            layers.LSTM(units1, return_sequences=True),
            layers.BatchNormalization(),
            layers.LSTM(units2),
            layers.BatchNormalization(),
            layers.Dense(num_classes, activation=tf.nn.softmax)
        ])
    
    def call(self, x):
        x = self.sequential(x)
        return x

In [8]:
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [9]:
units_1 = 128
units_2 = 256
num_classes = 10

model = LSTM(units_1, units_2, num_classes)

In [10]:
model.compile(optimizer="sgd",
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"],
)

In [11]:
model.fit(
    x_train, y_train, validation_data=(x_test, y_test), batch_size=128, epochs=3
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x25b0fc07310>

### Bidirectional LSTM Application

In [12]:
class Bi_LSTM(Model):
    def __init__(self, units1, units2, num_classes):
        super(Bi_LSTM, self).__init__()
        self.sequential = tf.keras.Sequential([
            layers.Bidirectional(
                layers.LSTM(units1, return_sequences=True)),
            layers.BatchNormalization(),
            layers.Bidirectional(layers.LSTM(units2)),
            layers.BatchNormalization(),
            layers.Dense(num_classes, activation=tf.nn.softmax)
        ])
    
    def call(self, x):
        # x = self.sequential(x)
        x = self.sequential(x)
        return x

In [13]:
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [14]:
units_1 = 128
units_2 = 256
num_classes = 10

model = Bi_LSTM(units_1, units_2, num_classes)

In [15]:
model.compile(optimizer="sgd",
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"],
)

In [16]:
model.fit(
    x_train, y_train, validation_data=(x_test, y_test), batch_size=128, epochs=3
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x25b1555e040>