In [2]:
## 기본 모듈
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

## 딥러닝 모듈
import tensorflow
from tensorflow import keras
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D
from keras.layers import BatchNormalization, Dropout
from keras.utils import to_categorical
from keras.losses import MSE
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint

## 1. imdb : 영화 리뷰 분류

In [3]:
from keras.datasets import imdb

In [4]:
dir(imdb)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_remove_long_seq',
 'get_file',
 'get_word_index',
 'json',
 'keras_export',
 'load_data',
 'logging',
 'np']

In [5]:
(X_train_full, y_train_full), (X_test, y_test) = imdb.load_data(num_words=10000)

In [6]:
# pd.Series(X_train_full).apply(len).sum()

In [7]:
print(pd.Series(X_train_full).apply(len).max())
print(pd.Series(X_train_full).apply(len).min())
# np.apply_along_axis 로도 사용 가능

2494
11


In [8]:
# word_index는 단어와 정수 인덱스를 매핑한 딕셔너리
word_index = imdb.get_word_index()
# 정수 인덱스와 단어를 매핑하도록 뒤집기
index_word = {v:k for k,v in word_index.items()}

In [9]:
word = []
for i in X_train_full[0]:
    word.append(index_word[i])
sentence = " ".join(word)
sentence

"the as you with out themselves powerful lets loves their becomes reaching had journalist of lot from anyone to have after out atmosphere never more room and it so heart shows to years of every never going and help moments or of every chest visual movie except her was several of enough more with is now current film as you of mine potentially unfortunately of you than him that with out themselves her get for was camp of you movie sometimes movie that with scary but and to story wonderful that in seeing in character to of 70s musicians with heart had shadows they of here that with her serious to have does when from why what have critics they is you that isn't one will very to as itself with other and in of seen over landed for anyone of and br show's to whether from than out themselves history he name half some br of and odd was two most of mean for 1 any an boat she he should is thought frog but of script you not while history he heart to real at barrel but when from one bit then have t

In [10]:
words = []
for word in X_train_full:
    words.extend(word)

In [11]:
len(set(words))

9998

In [12]:
np.unique(np.concatenate(X_train_full)).size

9998

In [13]:
len(words)

5967841

In [14]:
def vectorizer(doc,dim):
    result = np.zeros((len(doc),dim))
    for idx, doc in enumerate(doc):
        result[idx, doc] = 1
    return result

In [15]:
X_train_full = vectorizer(X_train_full,10000)
X_test = vectorizer(X_test,10000)

In [16]:
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full)

In [17]:
input_shape = X_train.shape[1:]
model = Sequential(
    [
        Dense(16,activation="relu",input_shape=input_shape),
        Dense(16,activation="relu"),
        Dense(1,activation="relu")        
    ]
)

In [18]:
model.compile(optimizer='rmsprop',
                loss='binary_crossentropy',
                metrics=['accuracy'])

In [19]:
model.fit(X_train,
            y_train,
            epochs=20,
            batch_size=512,
            validation_data=(X_val, y_val))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2380c30f610>

In [23]:
# x = np.random.randint(1,100,100)

In [24]:
# X_train_full = vectorizer(X_train_full,10000)
# X_test = vectorizer(X_test,10000)

## 2. reuters : 뉴스 기사 분류

In [25]:
from keras.datasets import reuters

In [26]:
(X_train_full, y_train_full), (X_test, y_test) = reuters.load_data(num_words=10000)

In [27]:
len(X_train_full)
len(X_test)

2246

In [28]:
np.unique(np.concatenate(X_train_full)).size
np.unique(np.concatenate(X_train_full)).max()

9999

In [29]:
X_train_full = vectorizer(X_train_full,10000)

In [30]:
X_test = vectorizer(X_test,10000)

In [31]:
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full)

In [32]:
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
y_test = to_categorical(y_test)

In [33]:
input_shape = X_train.shape[1:]
output_shape = y_train.shape[1]
model = Sequential(
    [
        Dense(64,activation="relu",input_shape=input_shape),
        Dense(64,activation="relu"),
        Dense(output_shape,activation="softmax")
    ]
)

In [34]:
model.compile(optimizer='rmsprop',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

In [35]:
file_path = "./reuter/checkpoint"
my_callback = [ModelCheckpoint(filepath=file_path,
                                save_best_only=True,
                                monitor="val_loss"),
                EarlyStopping(patience=10)]

In [36]:
model.fit(X_train, 
            y_train,
            epochs=20,
            batch_size=512,
            validation_data=(X_val,y_val),
            callbacks=my_callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20


<keras.callbacks.History at 0x23813d3a650>

In [37]:
model.evaluate(X_test, y_test)



[1.2392115592956543, 0.7729296684265137]

In [None]:
### modual check 하는법
# dir(ModelCheckpoint)
# ModelCheckpoint??

## 3. mnist

In [39]:
from keras.datasets import mnist

In [40]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [43]:
X_train.shape

(60000, 28, 28)

In [52]:
input_shape = X_train.shape[1:]

model = Sequential(
    [
        Flatten(input_shape=input_shape),
        Dense(100,activation="relu"),
        Dense(100,activation="relu"),
        Dense(10,activation="softmax")
    ]
)

In [53]:
optimizer = "rmsprop"
loss = "sparse_categorical_crossentropy"
metrics = ["sparse_categorical_accuracy"]

model.compile(optimizer=optimizer,
                loss=loss,
                metrics=metrics)

In [54]:
model.fit(X_train,
            y_train, 
            epochs=20,
            batch_size=1000,
            validation_data=(X_test,y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x23813d2fb20>

In [55]:
model.evaluate(X_test, y_test)



[0.42040297389030457, 0.9613000154495239]