## Reuters
#### 뉴스 기사의 내용을 통해 뉴스의 주제를 파악

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from tensorflow.keras import models, layers
from tensorflow.keras import callbacks
from tensorflow.keras import utils
from keras.datasets import reuters

In [2]:
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)

train_data, val_data, train_labels, val_labels = train_test_split(train_data, train_labels,
                                                                  test_size=0.3,
                                                                  random_state=123)

print(len(train_data))
print(len(val_data))
print(len(test_data))
print(len(set(train_labels)))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
[1m2110848/2110848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
6287
2695
2246
46


In [3]:
def one_hot_encoding(data, dim=10000):
  results = np.zeros((len(data), dim))
  for i, d in enumerate(data):
    results[i, d] = 1.
  return results

x_train = one_hot_encoding(train_data)
x_val = one_hot_encoding(val_data)
x_test = one_hot_encoding(test_data)

# 레이블 데이터들은 범주형 형태로 변환
train_labels = utils.to_categorical(train_labels)
val_labels = utils.to_categorical(val_labels)
test_labels = utils.to_categorical(test_labels)

In [19]:
import tensorflow as tf
from tensorflow.keras import models, layers

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(10000, ), name='input'))
model.add(layers.BatchNormalization())
model.add(layers.Dense(128, activation='relu', name='hidden'))
model.add(layers.Dropout(0.4))
model.add(layers.Dense(46, activation='softmax', name='output'))

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### loss : categorical_crossentropy(범주형 라벨 분류하기 위함)
### optimizer : rmsprop
### 평가 지표 : 정확도

In [20]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

## ModelCheckpoint와 early stopping 사용

In [21]:
from keras import callbacks

check_point_cb = callbacks.ModelCheckpoint('best_model.keras', save_best_only=True)
early_stopping_cb = callbacks.EarlyStopping(patience=3, monitor='val_loss',
                                  restore_best_weights=True)

history = model.fit(x_train, train_labels,
                    epochs=300,
                    batch_size=512,
                    validation_data=(x_val, val_labels),
                    callbacks=[check_point_cb, early_stopping_cb])

Epoch 1/300
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 281ms/step - accuracy: 0.4475 - loss: 2.5921 - val_accuracy: 0.7380 - val_loss: 3.2915
Epoch 2/300
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 197ms/step - accuracy: 0.8427 - loss: 0.7310 - val_accuracy: 0.7625 - val_loss: 3.1077
Epoch 3/300
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 155ms/step - accuracy: 0.9137 - loss: 0.4166 - val_accuracy: 0.7740 - val_loss: 2.9897
Epoch 4/300
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 241ms/step - accuracy: 0.9437 - loss: 0.2708 - val_accuracy: 0.7777 - val_loss: 2.7645
Epoch 5/300
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 224ms/step - accuracy: 0.9584 - loss: 0.1874 - val_accuracy: 0.7770 - val_loss: 2.5616
Epoch 6/300
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 158ms/step - accuracy: 0.9642 - loss: 0.1562 - val_accuracy: 0.7803 - val_loss: 2.3823
Epoch 7/300
[1m13/13

## 모델 최종 평가 : 정확도 >> 0.78

In [22]:
model.evaluate(x_test, test_labels, verbose = 1)

[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.7899 - loss: 0.9548


[0.962600827217102, 0.7827248573303223]