# 1. 텍스트 분류 방법
## [ 머신러닝 방식 ]
* 1) 비지도 : 군집화, k평균 군집화
* 2) 지도학습 : 로지스틱, 나이브 베이즈, k-최근법, 트리기반 알고리즘, 표집법

## [ 딥러닝 ] : RNN, LSTM

# 2. 텍스트 분류 활용
- 정서 분석(감성분석) : 긍정, 부정
- 스팸 메일 : 스팸 유무
- 뉴스 범주 구분 : 어느 범주에 속하는지
- 민원선별 : 시급한가 아닌가
- 선거활동 : 오바마 대선... 등등

# 3. 로이터 뉴스 기사
* 1) 구성 :
-- 훈련용 뉴스 : 8982
-- 검정용 뉴스 : 2246
-- 범주 : 46개의 범주

# # 데이터 수집 -> EDA -> 데이터 가공 -> 모형 구성 -> 모형 컴파일 -> 모형 훈련 -> 모형 훈련 평가 -> 모형 성능 평가 -> 모형 활용(추론 = 예측)

In [29]:
from keras.datasets.reuters import load_data

from  keras.models import Sequential   # 인공지능 모형의 구조  ( 순차형  vs. 함수형)을  결정하는데 쓰이는 클래스  
from keras.layers import Dense, Activation, Embedding, LSTM #  인공지는 모형을 이루는 각 계층을 구현하는 클래스 
from keras import optimizers  # 최적화를 담당할 알고리즘을 담은 함수

#범주형으로 되어 있는 y 항의 각 값을 원핫 인코딩하는데 사용할 함수  
from  keras.utils.np_utils import to_categorical
from keras.preprocessing import sequence
from keras.preprocessing.sequence import pad_sequences 

#데이터 분할 할 때 사용할 함수  
from sklearn.model_selection import train_test_split 

import matplotlib.pyplot as plt

import pandas as pd
import numpy as np

plt.rc('font', family='NanumBarunGothic')

In [30]:
#훈련 중 모형을 점검하고 조기 중단하는데 필요한 도구
from keras.callbacks import EarlyStopping, ModelCheckpoint

import warnings
warnings.filterwarnings('ignore')

In [31]:
# 난수 생성 시에 시드값 설정
seed = 0
np.random.seed(seed)

# 1. 데이터 수집
* 1) 데이터 적재 -> 데이터 분할

In [32]:
# 1-1. 데이터 로드
# 데이터를 훈련용, 테스트용으로 나눠서 로드한다.
# 단어의 개수를 최대 2만개로 제한한다.
# 테스트용데이터를 20% 분할하겠다.
(훈련용_X, 훈련용_y), (테스트용_X, 테스트용_y) = load_data(num_words=20000, test_split=0.2)

In [33]:
# 검증용 데이터 분할
훈련용_X, 검증용_X, 훈련용_y, 검증용_y = train_test_split(훈련용_X, 훈련용_y, train_size=0.8, test_size = 0.2, shuffle=True)

print(훈련용_X.shape)
print(검증용_X.shape)
print(훈련용_y.shape)
print(검증용_y.shape)
print(테스트용_X.shape)

(7185,)
(1797,)
(7185,)
(1797,)
(2246,)


# 2. EDA = 데이터 탐색 = 탐색적 데이터 분석
* 데이터셋 확인 -> 데이터 내용 확인

In [34]:
# 2-1. 데이터셋 확인 = 잡합 크기 확인
# 각 데이터셋의 모양을 확인해보자.
print(f'훈련용 분할한 X항의 모양 = {훈련용_X.shape}')
print(f'검증용으로 분할한 X항의 모양={검증용_X.shape}')
print(f'테스트용 X항의 모양={테스트용_X.shape}')

print(f'훈련용 분할한 y항의 모양 = {훈련용_y.shape}')
print(f'검증용으로 분할한 y항의 모양={검증용_y.shape}')
print(f'테스트용 y항의 모양={테스트용_y.shape}')

훈련용 분할한 X항의 모양 = (7185,)
검증용으로 분할한 X항의 모양=(1797,)
테스트용 X항의 모양=(2246,)
훈련용 분할한 y항의 모양 = (7185,)
검증용으로 분할한 y항의 모양=(1797,)
테스트용 y항의 모양=(2246,)


In [35]:
# 2-2. 데이터 내용 확인(훈련용, 검증용, 테스트용)
# 뉴스의 기사가 들어 있는 X항의 구조
pd.DataFrame(훈련용_X)

Unnamed: 0,0
0,"[1, 431, 484, 884, 568, 1184, 4, 37, 38, 66, 2..."
1,"[1, 232, 5002, 9, 873, 81, 8, 16, 701, 10, 133..."
2,"[1, 4652, 7665, 5322, 985, 5, 4, 237, 2813, 7,..."
3,"[1, 4, 60, 5, 794, 8, 16, 299, 45, 2112, 7, 4,..."
4,"[1, 16032, 17369, 149, 8, 261, 1533, 10, 384, ..."
...,...
7180,"[1, 4916, 11714, 81, 8, 16, 40, 515, 4, 384, 2..."
7181,"[1, 130, 23, 2916, 16002, 7951, 931, 43, 4, 37..."
7182,"[1, 2, 81, 8, 16, 701, 42, 120, 6, 721, 1054, ..."
7183,"[1, 4, 740, 291, 1098, 1641, 40, 1533, 42, 549..."


In [36]:
# 범주가 들어 있는 y항의 구조
pd.DataFrame(훈련용_y)

Unnamed: 0,0
0,11
1,4
2,11
3,19
4,3
...,...
7180,4
7181,19
7182,3
7183,4


# 3. 데이터 가공
* 시퀀스 길이를 통일 = 시퀀스 채우기 = 시퀀스 패딩
* 표적치(범주)를 원핫 인코딩

In [37]:
# 3-1. 시퀀스 길이를 통일 = 시퀀스 채우기 = 시퀀스 패딩
# 길이를 맞추기 전의 각 X항의 내용을 확인한다.
print(pd.DataFrame(훈련용_X).head(3))
print(len(훈련용_X[0]))
print(len(훈련용_X[1]))
print(len(훈련용_X[2]))

                                                   0
0  [1, 431, 484, 884, 568, 1184, 4, 37, 38, 66, 2...
1  [1, 232, 5002, 9, 873, 81, 8, 16, 701, 10, 133...
2  [1, 4652, 7665, 5322, 985, 5, 4, 237, 2813, 7,...
66
197
264


# # 3-2. 3-1의 출력 내용을 확인할 경우 뉴스의 기사 길이가 각각 다르다는 것을 확인할 수 있다.
- 훈련용과 검증용의 X항에 있는 각 표본의 길이를 동일하게 채운다.(padding, 채우기).
- from keras.preprocessing.sequence import pad_sequences 를 사용해서 채운다.(대상, maxlen)

In [38]:
# pad_sequences 는 크기의 나머지는 0으로 채운다.
훈련용_X = pad_sequences(훈련용_X, maxlen=200)
검증용_X = pad_sequences(검증용_X, maxlen=200)
테스트용_X = pad_sequences(테스트용_X, maxlen=200)

In [39]:
훈련용_X[1]

array([    0,     0,     0,     1,   232,  5002,     9,   873,    81,
           8,    16,   701,    10,  1331,   120,     6,   337,    25,
        4901,   707,    13,  3920,    11,    15,     6,   884,     2,
           2,   111,    10,   304,     5,   234,  3154,    71,    56,
        9103,     4,   211,     5,     4,  4901,   707,    34,   170,
         136,     4,   326,   384,   292,   300,     6,   337,    25,
        4901,     9,  7920,   971,    16,     8,     4,  5357,   218,
          34,   385,    54,   139,    13,  2611,    11,    15,     6,
        8355,    71,   359,     5,  4968,  3591,   450,   232,  5002,
           8,     4,   406,    96,    33,    30,   600,     7,   197,
          10,   232,  5002,  1658,     8,     4,   211,  3092,   232,
           6,  2137,    21,    25,  2292,   971,  1727,     9,  4752,
         667,  1154,    22,     4,   211,    33,    30,   529,     6,
        2932,   222,     9,    13,   100,  1120,  1619,     4,    49,
           8,     7,

In [40]:
print(훈련용_X.shape)
print(검증용_X.shape)
print(테스트용_X.shape)

(7185, 200)
(1797, 200)
(2246, 200)


In [41]:
# 3-1. 기사의 길이를 맞춰보고 결과를 확인하자.
pd.DataFrame(훈련용_X)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,21,269,10,433,115,609,115,55,1077,6,601,756,36,118,4,424,4008,415,16,40,2259,6,320,410,51,115,949,601,16,1077,6,761,233,836,36,118,4,2749,17,12
1,0,0,0,1,232,5002,9,873,81,8,16,701,10,1331,120,6,337,25,4901,707,13,3920,11,15,6,884,2,2,111,10,304,5,234,3154,71,56,9103,4,211,5,...,3349,955,9,100,1135,2102,4,49,8,9305,16170,4901,218,40,88,5,69,3525,11,15,234,3154,8,9305,4901,3154,439,19279,19403,9,10873,2,2,9,2,3126,342,2,17,12
2,477,1258,50,36,1457,5,7,10,765,6,424,1412,9,1078,415,265,3798,5606,41,45,3977,30,21,10,542,13,542,503,1258,535,45,57,6,30,8065,542,6,542,5322,8,...,4217,10080,7,68,520,27,4918,2003,41,2169,616,6,1258,22,4,100,1234,36,8,787,24,936,10080,330,210,30,1980,27,238,410,13731,7,68,520,13,7627,7,395,17,12
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,60,5,794,8,16,299,45,2112,7,4,239,76,281,4,846,16,75,8,24,16,64,560,25,862,5,4,956,1264,7,4,76,6,1663,11,167,22,628,11,17,12
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,149,8,261,1533,10,384,292,109,13,68,80,467,8645,6,823,5,146,93,151,4,49,8,25,80,33,1076,238,21,10,467,503,21,93,12,527,6,1135,2102,17,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,4916,11714,81,8,16,40,515,4,384,292,211,5,4,280,5,25,7172,19977,912,6,1299,381,16949,244,81,13,42,938,513,5,197,9,975,7,1609,5,2463,347,17,12
7181,1556,1086,125,174,4726,6,1102,4,66,911,115,55,95,7446,97,2341,24,4,73,2093,5,4,4782,415,535,616,6,10516,9,511,435,8,237,355,356,1880,4520,7114,19547,4,...,173,475,6,54,10827,106,2651,5,73,168,578,652,12026,4,223,43,256,173,475,242,24,4,106,40,592,25,8310,6,10,2651,1429,6,2897,4,223,4,3787,8,17,12
7182,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,21,25,691,4981,7,4538,16,8,42,549,1738,23,528,13,136,6525,2277,9,8,25,1054,10253,33,2730,137,404,349,6,2,142,11,15,13,4,188,943,1738,1831,17,12
7183,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,2014,5,4,198,245,22,397,124,89,6,207,156,13,88,5,1446,3490,6505,6,30,3069,13,4,291,2232,137,88,530,30,1580,254,198,51,45,565,97,397,124,17,12


In [42]:
# 3-2. 표적치, 범주를 원핫 인코딩 y축은 범주형 값을 나타낸다.
pd.DataFrame(훈련용_y)
훈련용_원핫인코딩_y = to_categorical(훈련용_y)
검증용_원핫인코딩_y = to_categorical(검증용_y)
테스트용_원핫인코딩_y = to_categorical(테스트용_y)

In [43]:
pd.DataFrame(훈련용_원핫인코딩_y)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7180,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7181,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7182,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7183,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# 4. 모형 구성
* 모형 구성 설계 -> 계층 구성 설계 -> 모형 구조 확인

In [44]:
# 4-1. 조기 중단(early stopping) 지정 : 훈련 중에 특정 조건에 도달한 경우에 더이상 훈련을 하지 않는 방법
# 훈련 학습의 반복 실행 중 일정 손실에 도달하면 중단
# patience=10 : 10번 반복하는 동안(에포크) 손실이 낮아지지 않으면 조기 중단
callback = EarlyStopping(monitor='loss', patience=10)

In [45]:
# 4-2. 모형 구성 설계
모형 = Sequential()

# # Embedding : 단어 하나(인코딩된 양의 정수값)를 고정 크기로 조밀 벡터를 생성한다.
eg) [2, 10, , , , ] -> [[2], [10], , ,] -> [[0.2, 0.1, 0.3, 0.4], [0.1, 0.2, 0.6, 0.25], , ]

* tf.keras.layers.Embedding(
  *    input_dim, : 최대 인덱스 +1 = 어휘목록에 있는 어휘의 개수
  *   output_dim, : 각 단어를 나타내는 고정길이 = 각 단어를 나타내는 리스트
  *  embeddings_initializer="uniform",
  *  embeddings_regularizer=None,
  *  activity_regularizer=None,
  *  embeddings_constraint=None,
  *  mask_zero=False,
  *  input_length=None, : 입력단어 벡터의 최대 길이
  *  **kwargs
)

In [46]:
# 4-3. 계층 구성 설계
모형.add(Embedding(input_dim=20000, output_dim=4, input_length=200))

# # LSTM 계층 : RNN 계층의 일종이다.
* RNN 에 가장 잘 맞는 activation='tanh'이다.

# # [ Drop out (드롭 아웃)]
* 1) 세포 자동사라는 생리학 용어
* 2) 뇌 세포의 경우에는 자주 쓰이지 않는 것들은 자동으로 소거된다.
* 3) 인공 신경세포, 지정한 비율에 맞춰 해당 계층의 뉴런 중에 일부를 소거하는(zeroed out)하는 방식으로 세포 자동사를 모방했다.

In [47]:
# 4-3. LSTM 모형의 계층을 두 번째 레이어(계층)에 지정한다.
모형.add(LSTM(200, activation='tanh', dropout=0.3))

In [48]:
# 마지막 계층을 마무리하자.
# 아웃풋 범주의 개수가 46개
모형.add(Dense(46, activation='softmax'))

In [49]:
# 4-4. 모형 구조 확인
모형.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 200, 4)            80000     
                                                                 
 lstm_1 (LSTM)               (None, 200)               164000    
                                                                 
 dense_1 (Dense)             (None, 46)                9246      
                                                                 
Total params: 253,246
Trainable params: 253,246
Non-trainable params: 0
_________________________________________________________________


 # #요약 출력 내용의 의미를 알아보자.
* Layer (type)       /         Output Shape       /       Param #   
=================================================================
* embedding (Embedding)   /    (None, 200, 4)      /      80000     : 4개의 요소로 구성된 리스트가 200개 있는 중첩 리스트
                                                                 
* lstm (LSTM)        /         (None, 200)           /    164000    : 장단기 기억 계층의 출력
                                                                 
* dense (Dense)        /       (None, 46)        /        9246      : 46개의 요소가 있는 리스트
                                                                 
=================================================================
* Total params: 253,246 : 훈련 가능한 파라미터란 듯으로 보통 가중치를 의미한다. 즉, 가중치의 개수가 253,246개 이다.
* Trainable params: 253,246
* Non-trainable params: 0

In [50]:
# 5. 모델 컴파일 categorical_crossentropy
모형.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
# 모형.compile(
#     loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
#     optimizer="sgd",
#     metrics=["accuracy"],
# )sparse_categorical_crossentropy

In [55]:
검증용_원핫인코딩_y.shape
검증용_X.shape

(1797, 200)

In [52]:
# 6. 모형 훈련
훈련_이력 = 모형.fit(훈련용_X, 훈련용_원핫인코딩_y, batch_size=100,
               epochs=10, verbose=1, validation_data=(검증용_X, 검증용_원핫인코딩_y))

Epoch 1/10


InvalidArgumentError: ignored

In [None]:
# 7. 훈련평가
# 7-1 훈련 이력 결과 확인
print(훈련_이력.model)
print(훈련_이력.epoch)
훈련_이력.history

In [None]:
plt.rc('font', family='NanumBarunGothic')
plt.plot(훈련_이력.history['accuracy'], label='훈련 정확도', ls='-', marker='x')
plt.plot(훈련_이력.history['val_accuracy'], label='검증 정확도', ls='-', marker='o')
plt.rc('font', family='Malgun Gothic')
plt.ylabel('정확도')
plt.xlabel('에포크')

plt.legend(loc='best')
plt.show()

In [None]:
훈련용_데이터_손실 = 훈련_이력.history['loss']
훈련용_데이터_정확도 = 훈련_이력.history['accuracy']

검증용_데이터_정확도 = 훈련_이력.history['val_loss']
검증용_데이터_정확도 = 훈련_이력.history['val_accuracy']

In [None]:
x_len = np.arange(len(훈련용_데이터_정확도))

plt.rc('font', family='Malgun Gothic')

plt.plot(x_len, 훈련용_데이터_정확도, marker='.', c="g", label='훈련용 데이터의 정확도')
plt.plot(x_len, 검증용_데이터_정확도, marker='.', c="b", label='검증용 데이터의 정확도')


plt.legend(loc='upper right')
plt.grid()
plt.xlabel('훈련 횟수(epoch)')
plt.ylabel('측정치')

plt.show()


# 8. 성능 평가

In [None]:
# 8-1. 간단한 평가 : 테스트의 정확도를 출력해보자.
손실, 정확도 = 모형.evaluate(테스트용_X, 테스트용_원핫인코딩_y)
print("\n", f"손실:{손실:.4f}, 정확도 : {정확도*100 :2.2f}%")

# 9. 모형 활용(추론 = 예측)

In [None]:
# 8-2. 테스트용 데이터(뉴스 기사별 데이터)를 바탕으로 예측치를 형성
예측치 = 모형.predict(테스트용_X)
pd.DataFrame(예측치)

In [None]:
# 40 개 데이터에 대해 예측해보자.
# 8-3. 손글씨의 그림들에 대한 예측값을 확인한다.
예측치 = 모형.predict(테스트용_X[:40])
print([np.argmax(예측치[i]) for i in range(40)])

In [None]:
# 8-4. 실측치와 같은지를 확인하자
print([테스트용_y[i] for i in range(40)])

# [ 결론 ]
* RNN 기반으로 로이터 뉴스 분류 인공지능을 만들었다.
* 모형의 품질이 안좋다, 따라서 개선을 해야한다.

#### [ 개선 방법 ]
* 1) 훈련 횟수를 최대한으로 늘려본다.
* 2) 각종 하이퍼파라미터를 조정해 본다.
* 3) 모형의 신경망 구조를 바꾸어 본다.
