In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import warnings
warnings.filterwarnings('ignore')

import tensorflow
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split


train = pd.read_csv('/content/drive/MyDrive/data/손글씨/train/train_data.csv')

images = []
counts = 0

for image_name in train['file_name']:
    image = np.array(Image.open('/content/drive/MyDrive/data/손글씨/train/' + image_name)).flatten()
    images.append(image)
    counts+=1
    if counts%500==0:
      print(counts)

image_full = pd.DataFrame(images)
image_full['label'] = train['label']


data_x = image_full.iloc[:,:-1]
data_x = data_x/255.0
data_x = data_x.values.reshape(-1,28,28,1)
data_y = to_categorical(train['label'], num_classes = 10)

x_train, x_val, y_train, y_val = train_test_split(data_x, data_y, test_size = .2, random_state = 1234, stratify = data_y)

500
1000
1500
2000
2500
3000
3500
4000
4500
5000


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

augmentation = ImageDataGenerator(featurewise_center = False,
                                 samplewise_center = False,
                                 featurewise_std_normalization = False,
                                 samplewise_std_normalization = False,
                                 zca_whitening = False,
                                 rotation_range = 15,
                                 zoom_range = 0.15,
                                 width_shift_range = 0.15,
                                 height_shift_range = 0.15,  
                                 horizontal_flip = False,
                                 vertical_flip = False)

augmentation.fit(x_train)

In [None]:
import tensorflow
from tensorflow.keras.callbacks import ReduceLROnPlateau

model = tensorflow.keras.Sequential([
    tensorflow.keras.layers.Conv2D(kernel_size=(3,3), filters=64, input_shape=(28,28,1), padding='same', activation='relu'),
    tensorflow.keras.layers.Conv2D(kernel_size=(3,3), filters=64, padding='same', activation='relu'),
    tensorflow.keras.layers.Conv2D(kernel_size=(3,3), filters=64, padding='same', activation='relu'),
    tensorflow.keras.layers.MaxPool2D(pool_size=(2,2)),
    tensorflow.keras.layers.Dropout(0.2),

    tensorflow.keras.layers.Conv2D(kernel_size=(3,3), filters=64, padding='same', activation='relu'),
    tensorflow.keras.layers.Conv2D(kernel_size=(3,3), filters=64, padding='same', activation='relu'),
    tensorflow.keras.layers.Conv2D(kernel_size=(3,3), filters=64, padding='same', activation='relu'),
    tensorflow.keras.layers.MaxPool2D(pool_size=(2,2)),
    tensorflow.keras.layers.Dropout(0.2),

    tensorflow.keras.layers.Flatten(),
    tensorflow.keras.layers.Dense(units=256, activation='relu'),
    tensorflow.keras.layers.Dense(units=64, activation='relu'),
    tensorflow.keras.layers.Dense(units=32, activation='relu'),
    tensorflow.keras.layers.Dropout(0.2),
    tensorflow.keras.layers.Dense(units = 10, activation='softmax')
])
optimizer = tensorflow.keras.optimizers.RMSprop(lr = 0.001, rho = 0.9, epsilon = 1e-08, decay = 0.0 )
model.compile(optimizer = optimizer, 
              loss = "categorical_crossentropy", 
              metrics = ["accuracy"])


call_back = ReduceLROnPlateau(monitor = 'val_accuracy', patience = 3, verbose = 1, factor = 0.5, min_lr = 0.0001)

In [None]:
history = model.fit_generator(augmentation.flow(x_train, y_train, batch_size = 64),
                              epochs = 300, 
                              validation_data = (x_val, y_val),
                              verbose = 1,
                              steps_per_epoch=x_train.shape[0] // 64,  
                              callbacks=[call_back])

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 00021: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 00029: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Ep

In [None]:
model.save('/content/drive/MyDrive/data/손글씨/model_image_aug5.h5')

In [None]:
from tensorflow.keras.models import load_model



model = load_model('/content/drive/MyDrive/data/손글씨/model_image_aug5.h5')

test = pd.read_csv('/content/drive/MyDrive/data/손글씨/test/test_data.csv')

test_images = []
counts = 0

for image_name in test['file_name']:
    image = np.array(Image.open('/content/drive/MyDrive/data/손글씨/test/' + image_name)).flatten()
    test_images.append(image)
    counts+=1
    if counts%500==0:
      print(counts)

500
1000
1500
2000
2500
3000
3500
4000
4500
5000


In [None]:
image_full_test = pd.DataFrame(test_images)

data_x = image_full_test/255.0
data_x = data_x.values.reshape(-1,28,28,1)

In [None]:
pred = model.predict(data_x)
predict = np.argmax(pred, axis = 1)


In [None]:
sub = pd.read_csv('/content/drive/MyDrive/data/손글씨/sample_submission.csv')
sub.head()

Unnamed: 0,file_name,label
0,idx0001.png,-1
1,idx0002.png,-1
2,idx0003.png,-1
3,idx0004.png,-1
4,idx0005.png,-1


In [None]:
sub['label'] = predict
sub.head()

Unnamed: 0,file_name,label
0,idx0001.png,1
1,idx0002.png,0
2,idx0003.png,8
3,idx0004.png,3
4,idx0005.png,5


In [None]:
sub.to_csv('/content/drive/MyDrive/data/손글씨/sub_image_aug5.csv', index = False)

In [None]:
# sub_image_aug기준 0.895 epoch 100
# sub_image_aug2기준 0.908 epoch 100 + 150
# sub_image_aug3기준 0.91 epoch 400