In [None]:
import os, time, math, pickle, random, numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
EPOCHS       = 30
BATCH_SIZE   = 128
LR           = 0.001          # Adam lr
NUM_SAMPLES  = 10             # 시각화에 뽑을 무작위 이미지 수
TRAIN_DIR    = "mnist_training"
TEST_DIR     = "mnist_testing"
MODEL_PATH   = "mnist_nn_model.pkl"

In [None]:
def load_folder_dataset(root_dir):
    xs, ys = [], []
    for dirpath, _, files in os.walk(root_dir):
        try:
            label = int(os.path.basename(dirpath))
        except ValueError:
            continue
        for fname in files:
            if not fname.lower().endswith((".png", ".jpg", ".jpeg", ".bmp")):
                continue
            img = np.asarray(Image.open(os.path.join(dirpath, fname)).convert("L"),
                             dtype=np.float32) / 255.0
            xs.append(img.reshape(-1)); ys.append(label)
    if not xs:
        raise FileNotFoundError(f"No images in {root_dir}")
    return np.stack(xs), np.array(ys, dtype=np.int64)

def prepare_data():
    Xtr, ytr = load_folder_dataset(TRAIN_DIR)
    Xte, yte = load_folder_dataset(TEST_DIR)
    mean, std = Xtr.mean(), Xtr.std()        # 글로벌 정규화
    return (Xtr-mean)/std, ytr, (Xte-mean)/std, yte

In [None]:
def relu(x):            return np.maximum(0, x)
def relu_grad(x):       return (x > 0).astype(np.float32)
def softmax(x):
    e = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e / np.sum(e, axis=1, keepdims=True)

In [None]:
class Dense:
    def __init__(self, in_dim, out_dim):
        self.W = np.random.randn(in_dim, out_dim).astype(np.float32) * np.sqrt(2/in_dim)
        self.b = np.zeros(out_dim, dtype=np.float32)
        self.mW = np.zeros_like(self.W); self.vW = np.zeros_like(self.W)
        self.mb = np.zeros_like(self.b); self.vb = np.zeros_like(self.b)

    def __call__(self, x):
        self.x = x
        return x @ self.W + self.b

    def backward(self, grad):
        self.dW = self.x.T @ grad / self.x.shape[0]
        self.db = grad.mean(axis=0)
        return grad @ self.W.T

    def step(self, lr, t, b1=0.9, b2=0.999, eps=1e-8):
        self.mW = b1*self.mW + (1-b1)*self.dW
        self.vW = b2*self.vW + (1-b2)*(self.dW**2)
        self.mb = b1*self.mb + (1-b1)*self.db
        self.vb = b2*self.vb + (1-b2)*(self.db**2)
        mW_hat = self.mW / (1-b1**t); vW_hat = self.vW / (1-b2**t)
        mb_hat = self.mb / (1-b1**t); vb_hat = self.vb / (1-b2**t)
        self.W -= lr * mW_hat / (np.sqrt(vW_hat)+eps)
        self.b -= lr * mb_hat / (np.sqrt(vb_hat)+eps)

In [None]:
class MLP:
    def __init__(self):
        self.fc1 = Dense(784,128)
        self.fc2 = Dense(128,64)
        self.fc3 = Dense(64,32)
        self.out = Dense(32,10)
        self.layers = [self.fc1, self.fc2, self.fc3, self.out]

    def forward(self, x):
        z1 = self.fc1(x); a1 = relu(z1)
        z2 = self.fc2(a1); a2 = relu(z2)
        z3 = self.fc3(a2); a3 = relu(z3)
        logits = self.out(a3)
        return logits, (z1,a1,z2,a2,z3,a3)

    def backward(self, logits, y, cache):
        z1,a1,z2,a2,z3,a3 = cache
        probs = softmax(logits)
        loss = -np.log(probs[np.arange(y.size), y] + 1e-9).mean()
        dL = probs
        dL[np.arange(y.size), y] -= 1
        dL /= y.size
        d = self.out.backward(dL)
        d = self.fc3.backward(d * relu_grad(z3))
        d = self.fc2.backward(d * relu_grad(z2))
        _  = self.fc1.backward(d * relu_grad(z1))
        return loss

    def step(self, lr, t):
        for lyr in self.layers: lyr.step(lr, t)

    def predict(self, X, bs=1024):
        preds = []
        for i in range(0, len(X), bs):
            logits,_ = self.forward(X[i:i+bs])
            preds.append(np.argmax(logits,1))
        return np.concatenate(preds)

In [None]:
def iter_batch(X, y, bs):
    idx = np.arange(len(X)); np.random.shuffle(idx)
    for st in range(0, len(X), bs): yield X[idx[st:st+bs]], y[idx[st:st+bs]]

def accuracy(model, X, y): return (model.predict(X)==y).mean()

def show_samples(model, X, y, n=NUM_SAMPLES):
    samp = np.random.choice(len(X), n, replace=False)
    imgs, labels = X[samp], y[samp]
    preds = model.predict(imgs)
    plt.figure(figsize=(n, 2))
    for i,(img,lbl,pred) in enumerate(zip(imgs, labels, preds)):
        plt.subplot(1, n, i+1)
        plt.imshow(img.reshape(28,28), cmap='gray')
        plt.axis('off'); plt.title(f"T:{lbl}\nP:{pred}")
    plt.tight_layout(); plt.show()

In [None]:
def main():
    X_train, y_train, X_test, y_test = prepare_data()
    print(f"train: {len(X_train)}  test: {len(X_test)}")

    model, t = MLP(), 0
    for epoch in range(1, EPOCHS+1):
        tic, losses = time.time(), []
        for xb, yb in iter_batch(X_train, y_train, BATCH_SIZE):
            logits, cache = model.forward(xb)
            loss = model.backward(logits, yb, cache)
            t += 1; model.step(LR, t); losses.append(loss)
        acc_train = accuracy(model, X_train, y_train)
        acc_test = accuracy(model, X_test,  y_test)
        print(f"Epoch {epoch:3}/{EPOCHS}  "
              f"loss {np.mean(losses):.4f}  "
              f"train {acc_train*100:5.2f}%  test {acc_test*100:5.2f}%  "
              f"{time.time()-tic:.1f}s")

    model.mean, model.std = X_train.mean(), X_train.std()
    
    with open(MODEL_PATH, "wb") as f:
        pickle.dump(model, f)
    print(f"모델 저장: {MODEL_PATH}")
    
    show_samples(model, X_train, y_train, NUM_SAMPLES)

if __name__ == "__main__":
    main()