# 7월 30일 딥러닝 실기평가
### https://github.com/MethodFunc

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [28]:
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.datasets import load_iris
from tensorflow import keras
from keras.models import Sequential
from keras.wrappers.scikit_learn import KerasClassifier
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.layers import Dense, GRU, SimpleRNN, MaxPooling2D, Conv2D, LSTM, Embedding, Dropout, Flatten
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [13]:
SEED = 2020
TEST_SIZE = 0.2
EPOCHS = 30
BATCH_SIZE = 4
N_SPLITS = 5

In [14]:
np.random.seed(SEED)
tf.random.set_seed(SEED)

## 1. Iris 데이터에 대해서 5겹 교차검증을 사용하여 분류하시오


In [5]:
iris = load_iris()

In [6]:
x_data = iris.data
y_data = iris.target

In [7]:
y_data = keras.utils.to_categorical(y_data)

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=TEST_SIZE, random_state=SEED)

In [9]:
def model_create():
    model = Sequential([
        Dense(128, activation='relu', input_dim=4),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(3, activation='softmax')
    ])

    model.compile('Adam', 'categorical_crossentropy', ['accuracy'])

    return model

In [12]:
history = KerasClassifier(build_fn=model_create, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)


In [13]:
kfold = KFold(n_splits=5, shuffle=True, random_state=SEED)

In [14]:
result = cross_val_score(history, x_test, y_test, cv=kfold)

In [23]:
print(np.sum(result)/len(result))

0.9


## 2 Fashin MNIST Dataset CNN분류

In [26]:
from keras.datasets import fashion_mnist

In [27]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [28]:
x_train.shape, y_train.shape, x_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28))

In [29]:
x_train = x_train.reshape(-1, 28,28,1) / 255.0
x_test = x_test.reshape(-1, 28,28,1) / 255.0

In [30]:
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

In [31]:
model2 = Sequential([
    Conv2D(32, kernel_size=3, input_shape=(28, 28, 1), activation='relu'),
    Conv2D(64, kernel_size=3, activation='relu'),
    MaxPooling2D(2),
    Dropout(0.25),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')

])

In [32]:
model2.compile('Adam', 'categorical_crossentropy', ['accuracy'])


In [33]:
history2 = model2.fit(x_train, y_train, epochs=10, batch_size=100)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [39]:
model2.evaluate(x_test, y_test)[1]



0.9199000000953674

## 3. IMDB 영화리뷰 데이터



In [73]:
imdb = pd.read_csv('data/labeledTrainData.tsv', delimiter='\t')

In [74]:
imdb

Unnamed: 0,id,sentiment,review
0,5814_8,1,With all this stuff going down at the moment w...
1,2381_9,1,"\The Classic War of the Worlds\"" by Timothy Hi..."
2,7759_3,0,The film starts with a manager (Nicholas Bell)...
3,3630_4,0,It must be assumed that those who praised this...
4,9495_8,1,Superbly trashy and wondrously unpretentious 8...
...,...,...,...
24995,3453_3,0,It seems like more consideration has gone into...
24996,5064_1,0,I don't believe they made this film. Completel...
24997,10905_3,0,"Guy is a loser. Can't get girls, needs to buil..."
24998,10194_3,0,This 30 minute documentary Buñuel made in the ...


In [75]:
del imdb['id']

In [76]:
x_data = imdb['review'].values
y_data = imdb['sentiment'].values

In [77]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(x_data)
sequences = tokenizer.texts_to_sequences(x_data)

In [78]:
word_to_index = tokenizer.word_index

In [79]:
vocab_size = len(word_to_index)+1
vocab_size

88583

In [80]:
x_data = sequences
max_len = max(len(l) for l in x_data)

In [81]:
data = pad_sequences(x_data, maxlen=max_len)

In [82]:
data.shape

(25000, 2493)

In [84]:
x_train, x_test, y_train, y_test = train_test_split(data, y_data, test_size = TEST_SIZE, random_state=SEED)

In [85]:
x_train.shape

(20000, 2493)

In [86]:
model = Sequential([
    Embedding(vocab_size, 32),
    SimpleRNN(32),
    Dense(1, activation='sigmoid')
])


In [87]:
model.compile('Adam', 'binary_crossentropy', ['accuracy'])

In [91]:
history3 = model.fit(x_train, y_train, epochs=10,  batch_size=100, validation_split=0.2, verbose=1)

Train on 16000 samples, validate on 4000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [93]:
model.evaluate(x_test, y_test)[1]



0.7979999780654907