## 1. 데이터 준비와 확인

## 2. 데이터로더 구성

## 3. 모델구성을 위한 데이터 분석 및 가공
 - 데이터셋 내 문장 길이 분포
 - 적절한 최대 문장 길이 지정
 - keras.preprocessing.sequence.pad_sequences 을 활용한 패딩 추가
 
 
## 4. 모델구성 및 validation set 구성
 - 3가지 이상
 
## 5. 모델 훈련

## 6. Loss, Accuracy 그래프 시각화

## 7. 학습된 Embedding 레이어 분석

## 8 . 한국어 Word2Vec 임베딩 활용하여 성능개선

 - 한국어 Word2Vec은 다음 경로에서 구할 수 있습니다.

https://github.com/Kyubyong/wordvectors

## 1. 데이터 준비와 확인
```
$ wget https://raw.githubusercontent.com/e9t/nsmc/master/ratings_train.txt
$ wget https://raw.githubusercontent.com/e9t/nsmc/master/ratings_test.txt
$ mv ratings_*.txt ~/aiffel/sentimental_classification
```

In [None]:
import pandas as pd
import urllib.request
%matplotlib inline
import matplotlib.pyplot as plt
import re
from konlpy.tag import Okt
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from collections import Counter

# 데이터를 읽어봅시다. 
train_data = pd.read_table('~/aiffel/sentiment_classification/ratings_train.txt')
test_data = pd.read_table('~/aiffel/sentiment_classification/ratings_test.txt')

train_data.head()

## 2. 데이터 로더 구성

 - 데이터의 중복 제거
 - NaN 결측치 제거
 - 한국어 토크나이저로 토큰화
 - 불용어(Stopwords) 제거
 - 사전word_to_index 구성
 - 텍스트 스트링을 사전 인덱스 스트링으로 변환
 - X_train, y_train, X_test, y_test, word_to_index 리턴

In [None]:
from konlpy.tag import Mecab
tokenizer = Mecab()
stopwords = ['의','가','이','은','들','는','좀','잘','걍','과','도',
             '를','으로','자','에','와','한','하다','다','고','하','을','.','..',
             ',','었','만','는데','로','음','것','아','네요','어','같','했','에서','기','네','거'
             ,'수','되','면','게','지','있','나','점','인','주','내','~','던','어요','할','겠','1'
             ,'해','습니다','...','더','라','그','볼']

def load_data(train_data, test_data, num_words=10000):
    train_data.drop_duplicates(subset=['document'], inplace=True)
    train_data = train_data.dropna(how = 'any') 
    test_data.drop_duplicates(subset=['document'], inplace=True)
    test_data = test_data.dropna(how = 'any') 

    X_train = []
    for sentence in train_data['document']:
        temp_X = tokenizer.morphs(sentence) # 토큰화
        temp_X = [word for word in temp_X if not word in stopwords] # 불용어 제거
        X_train.append(temp_X)

    X_test = []
    for sentence in test_data['document']:
        temp_X = tokenizer.morphs(sentence) # 토큰화
        temp_X = [word for word in temp_X if not word in stopwords] # 불용어 제거
        X_test.append(temp_X)

    words = np.concatenate(X_train).tolist()
    counter = Counter(words)
    counter = counter.most_common(10000-4)
    vocab = ['<PAD>', '<BOS>', '<UNK>', '<UNUSED>'] + [key for key, _ in counter]
    word_to_index = {word:index for index, word in enumerate(vocab)}

    def wordlist_to_indexlist(wordlist):
        return [word_to_index[word] if word in word_to_index else word_to_index['<UNK>'] for word in wordlist]

    X_train = list(map(wordlist_to_indexlist, X_train))
    X_test = list(map(wordlist_to_indexlist, X_test))

    return X_train, np.array(list(train_data['label'])), X_test, np.array(list(test_data['label'])), word_to_index

X_train, y_train, X_test, y_test, word_to_index = load_data(train_data, test_data) 

In [None]:
index_to_word = {index:word for word, index in word_to_index.items()}

###상위 100개 에서 불용어 제거하기
for i in range(50):
    print(index_to_word[i])

In [None]:
# 문장 1개를 활용할 딕셔너리와 함께 주면, 단어 인덱스 리스트 벡터로 변환해 주는 함수입니다. 
# 단, 모든 문장은 <BOS>로 시작하는 것으로 합니다. 
def get_encoded_sentence(sentence, word_to_index):
    return [word_to_index['<BOS>']]+[word_to_index[word] if word in word_to_index else word_to_index['<UNK>'] for word in sentence.split()]

# 여러 개의 문장 리스트를 한꺼번에 단어 인덱스 리스트 벡터로 encode해 주는 함수입니다. 
def get_encoded_sentences(sentences, word_to_index):
    return [get_encoded_sentence(sentence, word_to_index) for sentence in sentences]

# 숫자 벡터로 encode된 문장을 원래대로 decode하는 함수입니다. 
def get_decoded_sentence(encoded_sentence, index_to_word):
    return ' '.join(index_to_word[index] if index in index_to_word else '<UNK>' for index in encoded_sentence[1:])  #[1:]를 통해 <BOS>를 제외

# 여러개의 숫자 벡터로 encode된 문장을 한꺼번에 원래대로 decode하는 함수입니다. 
def get_decoded_sentences(encoded_sentences, index_to_word):
    return [get_decoded_sentence(encoded_sentence, index_to_word) for encoded_sentence in encoded_sentences]

## 3. 모델구성을 위한 데이터 분석 및 가공

- 데이터셋 내 문장 길이 분포
- 적절한 최대 문장 길이 지정
- keras.preprocessing.sequence.pad_sequences 을 활용한 패딩 추가

In [None]:
total_data_text = list(X_train) + list(X_test)
# 텍스트데이터 문장길이의 리스트를 생성한 후
num_tokens = [len(tokens) for tokens in total_data_text]
num_tokens = np.array(num_tokens)
# 문장길이의 평균값, 최대값, 표준편차를 계산해 본다. 
print('문장길이 평균 : ', np.mean(num_tokens))
print('문장길이 최대 : ', np.max(num_tokens))
print('문장길이 표준편차 : ', np.std(num_tokens))

# 예를들어, 최대 길이를 (평균 + 2*표준편차)로 한다면,  
max_tokens = np.mean(num_tokens) + 4 * np.std(num_tokens)
maxlen = int(max_tokens)
print('pad_sequences maxlen : ', maxlen)
print('전체 문장의 {}%가 maxlen 설정값 이내에 포함됩니다. '.format(np.sum(num_tokens < max_tokens) / len(num_tokens)))

#### pre padding이 더 성능이 좋다.
#### RNN 은 뒤로 갈 수록 앞의 데이터가 쌓이게 되는데, 최종 state값에 가장 영향을 많이 미친다.
#### 뒤의 데이터가 0이면 끝에 가서 데이터의 의미가 더 줄어들게 된다.
#### 따라서 앞에 부분을 0으로 Padding하는것이 좋다.

In [None]:
X_train = keras.preprocessing.sequence.pad_sequences(X_train,
                                                        value=word_to_index["<PAD>"],
                                                        padding='pre', # 혹은 'pre'
                                                        maxlen=maxlen)

X_test = keras.preprocessing.sequence.pad_sequences(X_test,
                                                       value=word_to_index["<PAD>"],
                                                       padding='pre', # 혹은 'pre'
                                                       maxlen=maxlen)

print(X_train.shape)

## 4. 모델구성 및 validation set 구성
 - 3가지 이상
 
 
 #### 4-1. LSTM

In [None]:
vocab_size = 100000    # 어휘 사전의 크기입니다(10,000개의 단어)
word_vector_dim = 47  # 워드 벡터의 차원수 (변경가능한 하이퍼파라미터)


model_LSTM = keras.Sequential()
# [[YOUR CODE]]
model_LSTM.add(keras.layers.Embedding(vocab_size, word_vector_dim, input_shape=(None,)))
model_LSTM.add(keras.layers.LSTM(128))   # 가장 널리 쓰이는 RNN인 LSTM 레이어를 사용하였습니다. 이때 LSTM state 벡터의 차원수는 8로 하였습니다. (변경가능)
model_LSTM.add(keras.layers.Dense(8, activation='relu'))
model_LSTM.add(keras.layers.Dense(1, activation='sigmoid'))  # 최종 출력은 긍정/부정을 나타내는 1dim 입니다.

model_LSTM.summary()

#### 4-2.   1-D CNN

In [None]:
model_1DCNN = keras.Sequential()
model_1DCNN.add(keras.layers.Embedding(vocab_size, word_vector_dim, input_shape=(None,)))
model_1DCNN.add(keras.layers.Conv1D(16, 7, activation='relu'))
model_1DCNN.add(keras.layers.MaxPooling1D(5))
model_1DCNN.add(keras.layers.Conv1D(16, 7, activation='relu'))
model_1DCNN.add(keras.layers.GlobalMaxPooling1D())
model_1DCNN.add(keras.layers.Dense(8, activation='relu'))
model_1DCNN.add(keras.layers.Dense(1, activation='sigmoid'))  # 최종 출력은 긍정/부정을 나타내는 1dim 입니다.

model_1DCNN.summary()

#### 4-3.  GlobalMaxPooling1D

In [None]:
model_GlobMaxPool = keras.Sequential()
model_GlobMaxPool.add(keras.layers.Embedding(vocab_size, word_vector_dim, input_shape=(None,)))
model_GlobMaxPool.add(keras.layers.GlobalMaxPooling1D())
model_GlobMaxPool.add(keras.layers.Dense(8, activation='relu'))
model_GlobMaxPool.add(keras.layers.Dense(1, activation='sigmoid'))  # 최종 출력은 긍정/부정을 나타내는 1dim 입니다.

model_GlobMaxPool.summary()

#### Validation Set

In [None]:
# validation set 46182건 분리
X_val = X_train[:46182]   
y_val = y_train[:46182]

# validation set을 제외한 나머지 100000건
partial_X_train = X_train[46182:]  
partial_y_train = y_train[46182:]

print(partial_X_train.shape)
print(partial_y_train.shape)

## 5. Training Model

In [None]:
model_LSTM.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
              
epochs=10  # 몇 epoch를 훈련하면 좋을지 결과를 보면서 바꾸어 봅시다. 

history_LSTM = model_LSTM.fit(partial_X_train,
                    partial_y_train,
                    epochs=epochs,
                    batch_size=512,
                    validation_data=(X_val, y_val),
                    verbose=1)

In [None]:
model_1DCNN.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
              
epochs=8  # 몇 epoch를 훈련하면 좋을지 결과를 보면서 바꾸어 봅시다. 

history_1DCNN = model_1DCNN.fit(partial_X_train,
                    partial_y_train,
                    epochs=epochs,
                    batch_size=512,
                    validation_data=(X_val, y_val),
                    verbose=1)

In [None]:
model_GlobMaxPool.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
              
epochs=7  # 몇 epoch를 훈련하면 좋을지 결과를 보면서 바꾸어 봅시다. 

history_GlobMaxPool = model_GlobMaxPool.fit(partial_X_train,
                    partial_y_train,
                    epochs=epochs,
                    batch_size=512,
                    validation_data=(X_val, y_val),
                    verbose=1)

## 6. Loss, Accuracy 그래프 시각화

In [None]:
results_LSTM = model_LSTM.evaluate(X_test,  y_test, verbose=2)
results_1DCNN = model_1DCNN.evaluate(X_test,  y_test, verbose=2)
results_GlobMaxPool = model_GlobMaxPool.evaluate(X_test,  y_test, verbose=2)
print(results_LSTM)
print(results_1DCNN)
print(results_GlobMaxPool)

In [None]:
history__LSTM_dict = history_LSTM.history
history__1DCNN_dict = history_1DCNN.history
history__GlobMaxPool_dict = history_GlobMaxPool.history
print(history__LSTM_dict.keys()) # epoch에 따른 그래프를 그려볼 수 있는 항목들
print(history__1DCNN_dict.keys())
print(history__GlobMaxPool_dict.keys())


In [None]:
import matplotlib.pyplot as plt

acc_LSTM = history__LSTM_dict['accuracy']
val_acc_LSTM = history__LSTM_dict['val_accuracy']
loss_LSTM = history__LSTM_dict['loss']
val_loss_LSTM = history__LSTM_dict['val_loss']

acc_1DCNN = history__1DCNN_dict['accuracy']
val_acc_1DCNN = history__1DCNN_dict['val_accuracy']
loss_1DCNN = history__1DCNN_dict['loss']
val_loss_1DCNN = history__1DCNN_dict['val_loss']

acc_GlobMaxPool = history__GlobMaxPool_dict['accuracy']
val_acc_GlobMaxPool= history__GlobMaxPool_dict['val_accuracy']
loss_GlobMaxPool = history__GlobMaxPool_dict['loss']
val_loss_GlobMaxPool = history__GlobMaxPool_dict['val_loss']

epochs_LSTM = range(1, len(acc_LSTM) + 1)
epochs_1DCNN = range(1, len(acc_1DCNN) + 1)
epochs_GlobMaxPool = range(1, len(acc_GlobMaxPool) + 1)


# "bo"는 "파란색 점"입니다
plt.subplot(2,2,1)
plt.rcParams["figure.figsize"] = (15,10)
plt.plot(epochs_LSTM, loss_LSTM, 'bo', label='Training loss')
# b는 "파란 실선"입니다
plt.plot(epochs_LSTM, val_loss_LSTM, 'b', label='Validation loss')
plt.title('LSTM Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(2,2,2)
plt.rcParams["figure.figsize"] = (15,10)
plt.plot(epochs_1DCNN, loss_1DCNN, 'bo', label='Training loss')
# b는 "파란 실선"입니다
plt.plot(epochs_1DCNN, val_loss_1DCNN, 'b', label='Validation loss')
plt.title('1DCNN Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(2,2,3)
plt.rcParams["figure.figsize"] = (15,10)
plt.plot(epochs_GlobMaxPool, loss_GlobMaxPool, 'bo', label='Training loss')
# b는 "파란 실선"입니다
plt.plot(epochs_GlobMaxPool, val_loss_GlobMaxPool, 'b', label='Validation loss')
plt.title('GlobMaxPool Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
plt.clf()   # 그림을 초기화합니다

plt.subplot(2,2,1)
plt.rcParams["figure.figsize"] = (15,10)
plt.plot(epochs_LSTM, acc_LSTM, 'bo', label='Training acc')
plt.plot(epochs_LSTM, val_acc_LSTM, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(2,2,2)
plt.rcParams["figure.figsize"] = (15,10)
plt.plot(epochs_1DCNN, acc_1DCNN, 'bo', label='Training acc')
plt.plot(epochs_1DCNN, val_acc_1DCNN, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(2,2,3)
plt.rcParams["figure.figsize"] = (15,10)
plt.plot(epochs_GlobMaxPool, acc_GlobMaxPool, 'bo', label='Training acc')
plt.plot(epochs_GlobMaxPool, val_acc_GlobMaxPool, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

## 7. 학습된 Embedding 레이어 분석

- 1. 가장 많이 쓰인 단어를 Sort 하고 그 중 많이 쓰인 Top100의 불용어를 추가하였다.

- 2. Train set = 100000, Test set = 46000 개로 설정하였다.

- 3. Maxlen = 47로, Model의 Feature를 47로 설정하였다.

- 4. 3개의 모델 모두 Epoch가 2~3 이상이면 오버피팅했다.

- 5. Data는 많지만 Model이 Shallow 해서 성능이 잘 안올라 가는 것으로 분석된다.

## 8.  한국어 Word2Vec 임베딩 활용하여 성능개선
 - 한국어 Word2Vec은 다음 경로에서 구할 수 있습니다.
https://github.com/Kyubyong/wordvectors

In [None]:
import os
from gensim.models import KeyedVectors
import gensim

word2vec_path = os.getenv('HOME')+'/aiffel/sentiment_classification/ko/ko.bin'
word2vec = gensim.models.Word2Vec.load(word2vec_path)

In [None]:
from konlpy.tag import Mecab
tokenizer = Mecab()
stopwords = ['의','가','이','은','들','는','좀','잘','걍','과','도',
             '를','으로','자','에','와','한','하다','다','고','하','을','.','..',
             ',','었','만','는데','로','음','것','아','네요','어','같','했','에서','기','네','거'
             ,'수','되','면','게','지','있','나','점','인','주','내','~','던','어요','할','겠','1'
             ,'해','습니다','...','더','라','그','볼']



vocab_size = 30185
word_vector_dim = 200

def load_data(train_data, test_data, num_words=vocab_size):
    train_data.drop_duplicates(subset=['document'], inplace=True)
    train_data = train_data.dropna(how = 'any') 
    test_data.drop_duplicates(subset=['document'], inplace=True)
    test_data = test_data.dropna(how = 'any') 

    X_train = []
    for sentence in train_data['document']:
        temp_X = tokenizer.morphs(sentence) # 토큰화
        temp_X = [word for word in temp_X if not word in stopwords] # 불용어 제거
        X_train.append(temp_X)

    X_test = []
    for sentence in test_data['document']:
        temp_X = tokenizer.morphs(sentence) # 토큰화
        temp_X = [word for word in temp_X if not word in stopwords] # 불용어 제거
        X_test.append(temp_X)

    words = np.concatenate(X_train).tolist()
    counter = Counter(words)
    counter = counter.most_common(vocab_size-4)
    vocab = ['<PAD>', '<BOS>', '<UNK>', '<UNUSED>'] + [key for key, _ in counter]
    word_to_index = {word:index for index, word in enumerate(vocab)}

    def wordlist_to_indexlist(wordlist):
        return [word_to_index[word] if word in word_to_index else word_to_index['<UNK>'] for word in wordlist]

    X_train = list(map(wordlist_to_indexlist, X_train))
    X_test = list(map(wordlist_to_indexlist, X_test))

    return X_train, np.array(list(train_data['label'])), X_test, np.array(list(test_data['label'])), word_to_index

X_train, y_train, X_test, y_test, word_to_index = load_data(train_data, test_data) 

In [None]:
index_to_word = {index:word for word, index in word_to_index.items()}

In [None]:
import numpy as np

embedding_matrix = np.random.rand(vocab_size, word_vector_dim)

for i in range(4,vocab_size):
    if index_to_word[i] in word2vec:
        embedding_matrix[i] = word2vec[index_to_word[i]]

In [None]:
from keras.initializers import Constant


vocab_size = 30185
word_vector_dim = 200

# 모델 구성
model_1DCNN = keras.Sequential()
model_1DCNN.add(keras.layers.Embedding(vocab_size, 
                                 word_vector_dim, 
                                 embeddings_initializer=Constant(embedding_matrix),  # 카피한 임베딩을 여기서 활용
                                 input_length=maxlen, 
                                 trainable=True))   # trainable을 True로 주면 Fine-tuning
model_1DCNN.add(keras.layers.Conv1D(64, 7, activation='relu'))
model_1DCNN.add(keras.layers.MaxPooling1D(5))
model_1DCNN.add(keras.layers.Conv1D(64, 7, activation='relu'))
model_1DCNN.add(keras.layers.GlobalMaxPooling1D())
model_1DCNN.add(keras.layers.Dense(32, activation='relu'))
model_1DCNN.add(keras.layers.Dense(1, activation='sigmoid')) 

model_1DCNN.summary()

In [None]:
# 학습의 진행
from keras import optimizers

Adam = optimizers.Adam(lr=0.0003)

model_1DCNN.compile(optimizer=Adam,
              loss='binary_crossentropy',
              metrics=['accuracy'])
              
epochs=15  # 몇 epoch를 훈련하면 좋을지 결과를 보면서 바꾸어 봅시다. 

history = model_1DCNN.fit(partial_X_train,
                    partial_y_train,
                    epochs=epochs,
                    batch_size=512,
                    validation_data=(X_val, y_val),
                    verbose=1)

In [None]:
# 모델 구성
model_LSTM = keras.Sequential()
# [[YOUR CODE]]
model_LSTM.add(keras.layers.Embedding(vocab_size, word_vector_dim,
                                      embeddings_initializer=Constant(embedding_matrix),
                                      input_length=maxlen, 
                                      trainable=True))
model_LSTM.add(keras.layers.LSTM(512,return_sequences = True ))
model_LSTM.add(keras.layers.LSTM(256,return_sequences = True)) 
model_LSTM.add(keras.layers.LSTM(128,return_sequences = True))#가장 널리 쓰이는 RNN인 LSTM 레이어를 사용하였습
model_LSTM.add(keras.layers.LSTM(128,return_sequences = True))
model_LSTM.add(keras.layers.LSTM(64,return_sequences = True))
model_LSTM.add(keras.layers.LSTM(64,return_sequences = False))
model_LSTM.add(keras.layers.Dense(16, activation='relu'))
model_LSTM.add(keras.layers.Dense(1, activation='sigmoid'))  # 최종 출력은 긍정/부정을 나타내는 1dim 입니다.

model_LSTM.summary()

In [None]:
Adam = optimizers.Adam(lr=0.0002)
model_LSTM.compile(optimizer=Adam,
              loss='binary_crossentropy',
              metrics=['accuracy'])
              
epochs=10  # 몇 epoch를 훈련하면 좋을지 결과를 보면서 바꾸어 봅시다. 

history_LSTM = model_LSTM.fit(partial_X_train,
                    partial_y_train,
                    epochs=epochs,
                    batch_size=512,
                    validation_data=(X_val, y_val),
                    verbose=1)

In [None]:
model_GlobMaxPool = keras.Sequential()
model_GlobMaxPool.add(keras.layers.Embedding(vocab_size, word_vector_dim, 
                                             input_length=maxlen, 
                                             trainable=True))
model_GlobMaxPool.add(keras.layers.GlobalMaxPooling1D())
model_GlobMaxPool.add(keras.layers.Dense(8, activation='relu'))
model_GlobMaxPool.add(keras.layers.Dense(1, activation='sigmoid'))  # 최종 출력은 긍정/부정을 나타내는 1dim 입니다.

model_GlobMaxPool.summary()

In [None]:
Adam = optimizers.Adam(lr=0.0002)
model_GlobMaxPool.compile(optimizer=Adam,
              loss='binary_crossentropy',
              metrics=['accuracy'])
              
epochs=10  # 몇 epoch를 훈련하면 좋을지 결과를 보면서 바꾸어 봅시다. 

history_GlobMaxPool = model_GlobMaxPool.fit(partial_X_train,
                    partial_y_train,
                    epochs=epochs,
                    batch_size=512,
                    validation_data=(X_val, y_val),
                    verbose=1)

In [None]:
import matplotlib.pyplot as plt

acc_LSTM = history__LSTM_dict['accuracy']
val_acc_LSTM = history__LSTM_dict['val_accuracy']
loss_LSTM = history__LSTM_dict['loss']
val_loss_LSTM = history__LSTM_dict['val_loss']

acc_1DCNN = history__1DCNN_dict['accuracy']
val_acc_1DCNN = history__1DCNN_dict['val_accuracy']
loss_1DCNN = history__1DCNN_dict['loss']
val_loss_1DCNN = history__1DCNN_dict['val_loss']

acc_GlobMaxPool = history__GlobMaxPool_dict['accuracy']
val_acc_GlobMaxPool= history__GlobMaxPool_dict['val_accuracy']
loss_GlobMaxPool = history__GlobMaxPool_dict['loss']
val_loss_GlobMaxPool = history__GlobMaxPool_dict['val_loss']

epochs_LSTM = range(1, len(acc_LSTM) + 1)
epochs_1DCNN = range(1, len(acc_1DCNN) + 1)
epochs_GlobMaxPool = range(1, len(acc_GlobMaxPool) + 1)


# "bo"는 "파란색 점"입니다
plt.subplot(2,2,1)
plt.rcParams["figure.figsize"] = (15,10)
plt.plot(epochs_LSTM, loss_LSTM, 'bo', label='Training loss')
# b는 "파란 실선"입니다
plt.plot(epochs_LSTM, val_loss_LSTM, 'b', label='Validation loss')
plt.title('LSTM Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(2,2,2)
plt.rcParams["figure.figsize"] = (15,10)
plt.plot(epochs_1DCNN, loss_1DCNN, 'bo', label='Training loss')
# b는 "파란 실선"입니다
plt.plot(epochs_1DCNN, val_loss_1DCNN, 'b', label='Validation loss')
plt.title('1DCNN Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(2,2,3)
plt.rcParams["figure.figsize"] = (15,10)
plt.plot(epochs_GlobMaxPool, loss_GlobMaxPool, 'bo', label='Training loss')
# b는 "파란 실선"입니다
plt.plot(epochs_GlobMaxPool, val_loss_GlobMaxPool, 'b', label='Validation loss')
plt.title('GlobMaxPool Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
plt.clf()   # 그림을 초기화합니다

plt.subplot(2,2,1)
plt.rcParams["figure.figsize"] = (15,10)
plt.plot(epochs_LSTM, acc_LSTM, 'bo', label='Training acc')
plt.plot(epochs_LSTM, val_acc_LSTM, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(2,2,2)
plt.rcParams["figure.figsize"] = (15,10)
plt.plot(epochs_1DCNN, acc_1DCNN, 'bo', label='Training acc')
plt.plot(epochs_1DCNN, val_acc_1DCNN, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(2,2,3)
plt.rcParams["figure.figsize"] = (15,10)
plt.plot(epochs_GlobMaxPool, acc_GlobMaxPool, 'bo', label='Training acc')
plt.plot(epochs_GlobMaxPool, val_acc_GlobMaxPool, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()