In [35]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os 
#케라스 경고메시지 제거
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

Mounted at /content/drive


In [36]:
##필요한 값 초기화
tf.random.set_seed(22)
np.random.seed(22)

##텐서플로우 버전 확인 작업
assert tf.__version__.startswith('2.')

batch_size = 128
total_world = 10000
max_review_len = 80
embedding_len = 100

In [37]:
## 데이터셋 준비

#IMDB데이터셋 내려받기, 등장빈도 순위 1~10000에 해당하는 단어만 사용
(x_train, y_train),(x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_world)

x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)

#Numpy배열을 Dataset으로 변환
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
#위에서 만들어 진 데이터셋을 셔플을 이용해 변경시킴
train_data = train_data.shuffle(10000).batch(batch_size, drop_remainder=True)

test_data = tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_data = test_data.batch(batch_size, drop_remainder=True)

print(f'x_train_shape : {x_train.shape} {tf.reduce_max(y_train)} {tf.reduce_min(y_train)}')
print(f'x_test_shape : {x_test.shape}')

sample = next(iter(test_data))
print(sample[0].shape)

  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


x_train_shape : (25000, 80) 1 0
x_test_shape : (25000, 80)
(128, 80)


In [41]:
##RNN Cell을 이용한 Network 생성
class RNN_Build(tf.keras.Model):
    def __init__(self, units):
        super(RNN_Build, self).__init__()
        
        self.state0 = [tf.zeros([batch_size, units])]
        self.state1 = [tf.zeros([batch_size, units])]
        self.embedding = tf.keras.layers.Embedding(total_world, embedding_len,input_length=max_review_len)
        
        self.RNNCell0 = tf.keras.layers.SimpleRNNCell(units, dropout=0.2)
        self.RNNCell1 = tf.keras.layers.SimpleRNNCell(units, dropout=0.2)
        self.outlayer = tf.keras.layers.Dense(1)

##RNN셀 구현
    def call(self, inputs, training=None):
      x = inputs
      x = self.embedding(x) #입력 데이터에 원핫인코딩 적용
      state0 = self.state0
      state1 = self.state1
      for word in tf.unstack(x, axis=1):
        out0, state0 = self.RNNCell0(word, state0,training)
        out1, state1 = self.RNNCell1(out0, state1,training)
      x = self.outlayer(out1)
      prob = tf.sigmoid(x)
    
      return prob

In [None]:
import time
units = 64
epochs = 4
t0 = time.time()

model = RNN_Build(units)
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
              loss=tf.losses.BinaryCrossentropy(),
              metrics=['accuracy'],
              experimental_run_tf_function=False
             )
model.fit(train_data ,epochs=epochs, validation_data=test_data, validation_freq=2)

Epoch 1/4
Epoch 2/4
Epoch 3/4

In [40]:
print('훈련 데이터셋 평가 ...')
(loss, accuracy) = model.evaluate(train_data,verbose=0)
print(f'loss={loss:.4f}, accuracy={accuracy*100:.4f}\n')

print('테스트 데이터셋 평가...')
(loss, accuracy) = model.evaluate(test_data,verbose=0)
print(f'loss={loss:.4f}, accuracy={accuracy*100:.4f}\n')

t1 = time.time()
print(f'시간 : {t1-t0}')

훈련 데이터셋 평가 ...
loss=0.0288, accuracy=99.2829

테스트 데이터셋 평가...
loss=2.9295, accuracy=49.5593

시간 : 61.87646412849426
