In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, preprocessing

In [2]:
import tensorflow_datasets as tfds

max_len = 200
n_words = 10000
dim_embedding = 256
EPOCHS = 20
BATCH_SIZE = 500

def load_data():
   # 데이터 로드
   (X_train, y_train),(X_test,y_test) = datasets.imdb.load_data(num_words = n_words)
   #문장을 max_len 이 되도록 채워 넣는다
   X_train = preprocessing.sequence.pad_sequences(X_train,maxlen=max_len)
   X_test = preprocessing.sequence.pad_sequences(X_test,maxlen=max_len)
   return (X_train, y_train),(X_test, y_test)

In [11]:
def build_model():
  model = models.Sequential()
  #입력 : -eEmbedding Layer.
  #모델은 크기의 정수 행렬을 입력으로 취한다(batch, input_length).
  #모델의 출력은 차원이다 (input_length, dim_embedding).
  #입력 중 가장 큰 정수는 n_words 보다 작거나 같다 (어휘 크기).
  model.add(layers.Embedding(n_words,
                              dim_embedding, input_length=max_len))

  model.add(layers.Dropout(0.3))

  #각 n_words 특징에서 특징 벡터의 최댓값을 취함
  model.add(layers.GlobalMaxPool1D())
  model.add(layers.Dense(128,activation = 'relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(1, activation='sigmoid'))

  return model

In [12]:
(X_train,y_train),(X_test,y_test)=load_data()
model = build_model()

In [13]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 200, 256)          2560000   
                                                                 
 dropout_2 (Dropout)         (None, 200, 256)          0         
                                                                 
 global_max_pooling1d_2 (Gl  (None, 256)               0         
 obalMaxPooling1D)                                               
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dropout_3 (Dropout)         (None, 128)               0         
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                      

In [14]:
model.compile(optimizer="adam", loss= "binary_crossentropy",
              metrics =["accuracy"]
              )

In [15]:
score = model.fit(X_train, y_train,
                  epochs=EPOCHS,
                  batch_size= BATCH_SIZE,
                  validation_data =(X_test,y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [18]:
score = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
print("\nTest score:", score[0])
print('test accuracy:',score[1])


Test score: 0.4964689314365387
test accuracy: 0.8514400124549866


In [24]:
predictions = model.predict(X_test)
print(predictions)

[[0.00405705]
 [0.99937564]
 [0.13621897]
 ...
 [0.00331138]
 [0.00128106]
 [0.9153306 ]]
