In [8]:
# Default setup과 tile 함수 정의부입니다.

# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "3_classification"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

from PIL import Image
from itertools import product
import glob
import cv2 as cv

def tile(filename, dir_in, dir_out, d):
    name, ext = os.path.splitext(filename)
    img = Image.open(os.path.join(dir_in, filename))
    w, h = img.size
    
    grid = product(range(0, h-h%d, d), range(0, w-w%d, d))
    for i, j in grid:
        box = (j+5, i+5, j+d-5, i+d-5)
        out = os.path.join(dir_out, f'{name}_{i//38}_{j//38}{ext}')
        img.crop(box).save(out)

In [None]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

mnist = fetch_openml('mnist_784', version=1)
mnist.keys()
X, y = mnist["data"], mnist["target"] # X, y: pandas DataFrame
X, y = X.to_numpy(), y.to_numpy() # X, y: numpy array

X_beforeInsert, y_beforeInsert = X[0:0].copy(), y[0:0].copy()
X_forTest, y_forTest = X[0:0].copy(), y[0:0].copy()
# X_beforeInsert와 y_beforeInsert는 빈 ndarray

path = sorted(glob.glob("mnist_append/out4/*.jpg"))

y_labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
cv_img = []
for img in path:
    n = cv.imread(img, 0)
    n = 255 - n
    n = np.ravel(n, order='C')
    cv_img.append(n)

X_beforeInsert = np.concatenate((X_beforeInsert, cv_img), axis=0)
y_beforeInsert = np.append(y_beforeInsert, y_labels * (len(cv_img)//10))

X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
X_i_train, X_i_test, y_i_train, y_i_test = train_test_split(X_beforeInsert, y_beforeInsert, test_size=0.2, shuffle=True, stratify=y_beforeInsert, random_state=42)
X_train = np.concatenate((X_train, X_i_train), axis=0)
X_test = np.concatenate((X_test, X_i_test), axis=0)
y_train = np.append(y_train, y_i_train)
y_test = np.append(y_test, y_i_test)
print("신규 데이터 중 train 데이터 비율:", (len(X_train)-60000)/(len(X_train)-60000 + len(X_test)-10000))
print("신규 데이터 중 test 데이터 비율:", (len(X_test)-10000)/((len(X_train)-60000 + len(X_test)-10000)))
print(len(X_train), len(X_test), len(y_test))

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

import time

mscaler=MinMaxScaler()
X_train_mscaled=mscaler.fit_transform(X_train)
X_test_mscaled=mscaler.transform(X_test)

In [None]:
st_tr = time.time()
best_clf = MLPClassifier(alpha = 0.1, max_iter = 200, random_state=42)
best_clf.fit(X_train_mscaled, y_train)
end_tr = time.time() - st_tr
    
st_ts = time.time()
y_predict = best_clf.predict(X_test_mscaled)
end_ts = time.time() - st_ts
acc = accuracy_score(y_test, y_predict)
print('acc:',acc,'predict time:', end_ts)

### 🚨For Competition🚨
a.k.a for Professor. /mnist_append/fortest 폴더에 380*380 이미지를 추가하십시오.

In [None]:
from PIL import Image
from skimage.io import imread
from skimage.color import rgb2gray
path = sorted(glob.glob("mnist_append/fortest/*.jpg"))
down_points = (280, 280)

j = 0
for i in path:
    im = imread(path[j])
    im1 = 1 - rgb2gray(im)
    threshold = 0.5
    im1[im1 <= threshold] = 0
    im1[im1 > threshold] = 1
    imageBox = Image.fromarray((im1*255).astype(np.uint8)).getbbox()
    cropped = Image.fromarray(im).crop(imageBox)
    savepath = 'mnist_append/fortest_bordercropped/' + str(j) + '.jpg'
    cropped.save(savepath)
    j += 1

pathtuned = sorted(glob.glob("mnist_append/fortest_bordercropped/*.jpg"))
j = 0
for i in pathtuned:
    image = cv.imread(pathtuned[j])
    resized_down = cv.resize(image, down_points, interpolation= cv.INTER_LINEAR)
    cv.imwrite("mnist_append/fortest_tuned/" + str(j) + ".jpg", resized_down)
    j += 1
    
# 대충 찍은 이미지를 border 기준으로 자르고 280*280 이미지로 리사이즈

In [None]:
def tile2(filename, dir_in, dir_out, d):
    name, ext = os.path.splitext(filename)
    img = Image.open(os.path.join(dir_in, filename))
    w, h = img.size
    
    grid = product(range(0, h-h%d, d), range(0, w-w%d, d))
    for i, j in grid:
        box = (j, i, j+d, i+d)
        out = os.path.join(dir_out, f'{name}_{i//28}_{j//28}{ext}')
        img.crop(box).save(out)
# 280*280 이미지를 28*28 이미지 100개로 쪼개는 함수

In [None]:
path = sorted(glob.glob("mnist_append/fortest_tuned/*.jpg"))
for i in range(len(path)):
    path[i] = os.path.basename(path[i])
    tile2(path[i],"mnist_append/fortest_tuned" ,"mnist_append/fortest_out", 28)

In [None]:
# Test만을 위한 데이터를 X_test와 y_test에 붙이는 셀
pathTestOut = sorted(glob.glob("mnist_append/fortest_out/*.jpg"))

y_labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
cv_img = []
for img in pathTestOut:
    n = cv.imread(img, 0)
    n = 255 - n
    n = np.ravel(n, order='C')
    cv_img.append(n)

X_forTest = np.concatenate((X_forTest, cv_img), axis=0)
y_forTest = np.append(y_forTest, y_labels * (len(cv_img)//10))
X_test = np.concatenate((X_test, X_forTest), axis=0)
y_test = np.append(y_test, y_forTest)
X_test_mscaled=mscaler.transform(X_test)

In [None]:
st_ts = time.time()
y_predict = best_clf.predict(X_test_mscaled)
end_ts = time.time() - st_ts

acc = accuracy_score(y_test, y_predict)
print('acc:',acc,'predict time:', end_ts)