## RNN
- Time series 데이터 처럼 신호가 앞뒤로 상관도가 있을 경우 적합한 RNN을 구현해본다.
- 영화 추천 데이터(리뷰, 추천=1, 미추천=0) 를 이용한 LSTM 구현
- https://keras.io/datasets/ -> imdb

In [2]:
from keras.preprocessing import sequence
from keras.datasets import imdb
from keras import layers, models

In [3]:
class Data:
    def __init__(self, max_features=20000, maxlen=80):
        (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) # num_words의미는 document의 datasets 참고
        x_train = sequence.pad_sequences(x_train, maxlen=maxlen) # 최대 80단어만 보겠다. 그 이하는 패드로 채움, 초과부분은 자름
        x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
        self.x_train, self.y_train = x_train, y_train
        self.x_test, self.y_test = x_test, y_test

In [4]:
class RNN_LSTM(models.Model):
    def __init__(self, max_features, maxlen):
        x = layers.Input((maxlen,))
        h = layers.Embedding(max_features, 128)(x)
        h = layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)(h)
        y = layers.Dense(1, activation='sigmoid')(h)
        super().__init__(x, y)
        
        self.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

#### Sequential 구현과 비교
```python
model = Seuential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
```

In [5]:
class Machine:
    def __init__(self, max_features=20000, maxlen=80):
        self.data = Data(max_features, maxlen)
        self.model = RNN_LSTM(max_features, maxlen)
    
    def run(self, epochs=3, batch_size=32):
        data = self.data
        model = self.model
        print('Training stage')
        print('==============')
        model.fit(data.x_train, data.y_train,
                 batch_size=batch_size,
                 epochs=epochs,
                 validation_data=(data.x_test, data.y_test))
        score, acc = model.evaluate(data.x_test, data.y_test,
                                   batch_size=batch_size)
        print('Test performance accuracy={0}, loss{1}'.format(acc, score))

In [6]:
def main():
    m = Machine()
    m.run()

In [7]:
main()

Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Training stage
Train on 25000 samples, validate on 25000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Test performance accuracy=0.82572, loss0.42156429757118224
