In [1]:
# -*- coding: utf-8 -*-
"""과제5 CNN기반 영상분류문제

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1Py2GvZc1GwqNbrR4fuQrHFy6zBEdF1xF
"""

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten,Dense
from tensorflow.keras.optimizers import Adam
import os
from tensorflow.keras.datasets import cifar10
from sklearn.model_selection import train_test_split
import random

(x_train,y_train),(x_test,y_test)=cifar10.load_data()
x_train=x_train.astype(np.float32)/255.0
y_train=tf.keras.utils.to_categorical(y_train,10)

x_test=x_test.astype(np.float32)/255.0
y_test=tf.keras.utils.to_categorical(y_test,10)

x_val, _, y_val,_ = train_test_split(x_test, y_test, test_size=0.6, random_state=1)

input_shape = x_train.shape[1:]

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [2]:
#동일조건 유지해야 하는 변수(두 모델 모두 동일하게 적용해야 함)
g_epoch = 70
g_batch = 64

#중요 : 아래함수 변경 불가!
def reset_random_seeds():
   os.environ['PYTHONHASHSEED']=str(1)
   tf.random.set_seed(1)
   np.random.seed(1)
   random.seed(1)
   os.environ['TF_DETERMINISTIC_OPS'] = '1'

reset_random_seeds() #필수
   
print("reduced train/val size:", len(x_train), len(x_val), "input shape:", input_shape)

reduced train/val size: 50000 4000 input shape: (32, 32, 3)


In [3]:
# to make this notebook's output stable across runs

tf.__version__

from tensorflow.keras.layers import MaxPooling2D, Dropout, Conv2D

cnn=Sequential()
cnn.add(Conv2D(64,(3,3),activation='relu',input_shape=(32,32,3)))
cnn.add(MaxPooling2D(pool_size=(2,2)))
cnn.add(Dropout(0.25))
cnn.add(Conv2D(128,(3,3),activation='relu', padding='same'))
cnn.add(MaxPooling2D(pool_size=(2,2)))
cnn.add(Dropout(0.25))
cnn.add(Conv2D(256,(3,3),activation='relu', padding='same'))
cnn.add(Conv2D(256,(3,3),activation='relu', padding='same'))
cnn.add(MaxPooling2D(pool_size=(2,2)))
cnn.add(Flatten())
cnn.add(Dense(1000,activation='relu'))
cnn.add(Dropout(0.5))
cnn.add(Dense(10,activation='softmax'))

cnn.compile(loss='categorical_crossentropy',optimizer=Adam(0.00002),metrics=['accuracy'])
cnn.summary()

hist=cnn.fit(x_train, y_train, batch_size=g_batch, epochs=g_epoch,
             validation_data=(x_val,y_val), verbose=1)

g_org_res=cnn.evaluate(x_test,y_test,verbose=0)
print("Baseline 정확률은",g_org_res[1]*100)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 64)        1792      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 15, 15, 64)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 15, 15, 64)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 15, 15, 128)       73856     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 7, 7, 128)        0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 7, 7, 128)         0

In [9]:
no_class = 10

# for transfer learning only
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=10,       # 회전
    zoom_range=0.1,          # 줌
    width_shift_range=0.1,   # 좌우 이동
    height_shift_range=0.1,  # 상하 이동
    shear_range=0.1,         # 전단 변환
    horizontal_flip=True,    # 좌우 반전
    vertical_flip=False,     # 상하 반전
    fill_mode='nearest'      # 새로 생성된 픽셀을 채우는 방법
)

(x_train_aug, y_train_aug) = datagen.flow(x_train, y_train, batch_size=256, shuffle=True, seed=1).next()

os.environ['PYTHONHASHSEED']=str(1)
tf.random.set_seed(1)
np.random.seed(1)
random.seed(1)

# for transfer learning only
transfermodel = VGG16(weights='imagenet',include_top=False,
                    input_shape=input_shape)
#base_model.trainable=False     # it's up to you

# your model architecture
model=Sequential()
# 전처리 레이어 추가/변경 가능
model.add(transfermodel)    # for transfer learning only
model.add(Flatten())        # for transfer learning only
model.add(Dropout(0.25))
model.add(Dense(1000,activation='relu')) # <<-- 변경가능
model.add(Dropout(0.5))
model.add(Dense(no_class, activation='softmax')) # <<-- activation은 변경가능

model.compile(loss='categorical_crossentropy',optimizer=Adam(0.00002),
              metrics=['accuracy']) # <<-- 변경가능

batch_siz = 128
generator = ImageDataGenerator(width_shift_range=0.1, height_shift_range = 0.1,
                               horizontal_flip = True)
model.summary()

# hist=model.fit(x_train, y_train, batch_size=g_batch, epochs=g_epoch,
#              validation_data=(x_val,y_val), verbose=1)
hist=model.fit_generator(generator.flow(x_train, y_train,
                         batch_size = batch_siz), epochs = 70,
                         validation_data = (x_test, y_test), verbose=2)

yours=model.evaluate(x_test,y_test,verbose=0)
print("Baseline vs yours: ",g_org_res[1]*100, yours[1]*100)

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 1, 1, 512)         14714688  
                                                                 
 flatten_4 (Flatten)         (None, 512)               0         
                                                                 
 dropout_9 (Dropout)         (None, 512)               0         
                                                                 
 dense_8 (Dense)             (None, 1000)              513000    
                                                                 
 dropout_10 (Dropout)        (None, 1000)              0         
                                                                 
 dense_9 (Dense)             (None, 10)                10010     
                                                                 
Total params: 15,237,698
Trainable params: 15,237,698


  hist=model.fit_generator(generator.flow(x_train, y_train,


391/391 - 48s - loss: 1.4077 - accuracy: 0.5035 - val_loss: 0.8391 - val_accuracy: 0.7096 - 48s/epoch - 123ms/step
Epoch 2/70
391/391 - 42s - loss: 0.8939 - accuracy: 0.6975 - val_loss: 0.7008 - val_accuracy: 0.7603 - 42s/epoch - 108ms/step
Epoch 3/70
391/391 - 42s - loss: 0.7433 - accuracy: 0.7484 - val_loss: 0.6631 - val_accuracy: 0.7730 - 42s/epoch - 108ms/step
Epoch 4/70
391/391 - 43s - loss: 0.6553 - accuracy: 0.7773 - val_loss: 0.6300 - val_accuracy: 0.7878 - 43s/epoch - 109ms/step
Epoch 5/70
391/391 - 42s - loss: 0.5959 - accuracy: 0.7989 - val_loss: 0.5740 - val_accuracy: 0.8058 - 42s/epoch - 108ms/step
Epoch 6/70
391/391 - 43s - loss: 0.5457 - accuracy: 0.8150 - val_loss: 0.5214 - val_accuracy: 0.8280 - 43s/epoch - 109ms/step
Epoch 7/70
391/391 - 42s - loss: 0.4984 - accuracy: 0.8301 - val_loss: 0.4894 - val_accuracy: 0.8349 - 42s/epoch - 108ms/step
Epoch 8/70
391/391 - 42s - loss: 0.4659 - accuracy: 0.8424 - val_loss: 0.4582 - val_accuracy: 0.8448 - 42s/epoch - 107ms/step
Epo

KeyboardInterrupt: ignored

In [7]:
org = g_org_res[1]*100
yours = yours[1]*100

if yours > (org + 2):
    print('SUCCESS! Difference: {0:0.3f}'.format(
                        (yours - org)))
else:
    print('TRY DIFFERENTLY! Difference: {0:0.3f}'.format(
                        (yours - org)))

SUCCESS! Difference: 10.030


In [None]:
# Save the Predict File
submission = pd.read_csv('Assignmnet_4_sample_solution.csv')

# 최종 제출 모델에 예측을 함.
y_predict = model.predict(x_test)

submission['label'] = np.argmax(y_predict,axis = 1)
submission.to_csv('submission.csv',index=False)

