In [1]:
# -------------------------------------------------
# 1. Imports & GPU
# -------------------------------------------------
import os, random, warnings, numpy as np, matplotlib.pyplot as plt
from pathlib import Path
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from PIL import Image
import cv2

warnings.filterwarnings('ignore')
tf.random.set_seed(42); np.random.seed(42); random.seed(42)

print("TF:", tf.__version__)
print("GPU:", tf.config.list_physical_devices('GPU'))

TF: 2.20.0
GPU: []


In [2]:
# -------------------------------------------------
# 2. GLOBAL SETTINGS
# -------------------------------------------------
DATA_ROOT     = Path(r"C:\Users\TIK03\Documents\GitHub\DIT5411-HoYiTik\Assgnment\data\characters")
NOTEBOOK_ROOT = Path(r"C:\Users\TIK03\Documents\GitHub\DIT5411-HoYiTik\Assgnment")

IMG_SIZE      = (64, 64)
SAMPLE_CHARS  = 100          # ← WORKS!
TARGET_PER_CLASS = 200
EPOCHS        = 8
BATCH_SIZE    = 32

In [3]:
# -------------------------------------------------
# 3. Verify folder
# -------------------------------------------------
if not DATA_ROOT.exists():
    raise FileNotFoundError(DATA_ROOT)

subfolders = [p for p in DATA_ROOT.iterdir() if p.is_dir()]
print(f"Found {len(subfolders)} character folders")
print("First 10:", [p.name for p in subfolders[:10]])

Found 13065 character folders
First 10: ['1', '10', '100', '1000', '10000', '10001', '10002', '10003', '10004', '10005']


In [4]:
# -------------------------------------------------
# 4. PIL loader (Chinese filenames)
# -------------------------------------------------
def safe_pil_read(p):
    try:
        return np.array(Image.open(p).convert('L'))
    except Exception as e:
        print(f"  [WARN] {p.name}: {e}")
        return None

def load_class(folder: Path):
    files = sorted(folder.glob("*.png"))
    imgs = []
    for p in files:
        arr = safe_pil_read(p)
        if arr is not None:
            imgs.append(cv2.resize(arr, IMG_SIZE, interpolation=cv2.INTER_AREA))
    return np.array(imgs, dtype=np.uint8) if imgs else None

In [5]:
# -------------------------------------------------
# 5. Load & split (40 train / rest test)
# -------------------------------------------------
char_folders = sorted([p for p in DATA_ROOT.iterdir() if p.is_dir()])
if SAMPLE_CHARS:
    char_folders = random.sample(char_folders, SAMPLE_CHARS)

print(f"\nLoading {len(char_folders)} classes …")

train_X_list, train_y_list = [], []
test_X_list , test_y_list  = [], []
label_to_char = {}
char_to_label = {}

for idx, folder in enumerate(char_folders):
    imgs = load_class(folder)
    if imgs is None or len(imgs) < 40:
        print(f"  [SKIP] {folder.name} – <40 images")
        continue

    label_to_char[idx] = folder.name
    char_to_label[folder.name] = idx

    n_train = min(40, len(imgs))
    train_X_list.append(imgs[:n_train])
    train_y_list.extend([idx] * n_train)

    if len(imgs) > n_train:
        test_X_list.append(imgs[n_train:])
        test_y_list.extend([idx] * (len(imgs) - n_train))

if not train_X_list:
    raise RuntimeError("No valid classes!")

train_X = np.concatenate(train_X_list)[..., np.newaxis] / 255.0
train_y = np.array(train_y_list, dtype=np.int32)

test_X = np.concatenate(test_X_list)[..., np.newaxis] / 255.0 if test_X_list else None
test_y = np.array(test_y_list, dtype=np.int32) if test_y_list else None

print(f"Train: {train_X.shape}  Test: {test_X.shape if test_X is not None else 'None'}")
print(f"Classes: {len(np.unique(train_y))}")


Loading 100 classes …
Train: (4000, 64, 64, 1)  Test: (1260, 64, 64, 1)
Classes: 100


In [6]:
# -------------------------------------------------
# 6. FIXED Augmentation (64x64 guaranteed)
# -------------------------------------------------
def random_transform(img):
    img = (img.squeeze() * 255).astype(np.uint8)
    h, w = 64, 64

    # rotation
    angle = random.choice([-12, -8, -4, 0, 4, 8, 12])
    M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0)
    img = cv2.warpAffine(img, M, (w, h))

    # shear
    shear = random.uniform(0.08, 0.22)
    M = np.float32([[1, shear, 0], [0, 1, 0]]) if random.random() > 0.5 else np.float32([[1, 0, 0], [shear, 1, 0]])
    img = cv2.warpAffine(img, M, (w, h))

    # scale
    scale = random.uniform(0.82, 1.18)
    nw, nh = int(w * scale), int(h * scale)
    img = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_AREA)

    # pad/crop to 64x64
    top = bottom = left = right = 0
    if nh < h:
        pad = (h - nh) // 2
        top, bottom = pad, h - nh - pad
    elif nh > h:
        crop = (nh - h) // 2
        img = img[crop:crop + h, :]
    if nw < w:
        pad = (w - nw) // 2
        left, right = pad, w - nw - pad
    elif nw > w:
        crop = (nw - w) // 2
        img = img[:, crop:crop + w]

    if any((top, bottom, left, right)):
        img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=255)

    return (img / 255.0)[..., np.newaxis]

def augment_to_target(class_imgs, target=200):
    aug = []
    while len(aug) < target:
        for img in class_imgs:
            aug.append(random_transform(img))
            if len(aug) >= target:
                break
    return np.array(aug[:target])

In [7]:
# -------------------------------------------------
# 7. Build augmented set
# -------------------------------------------------
print("Augmenting to 200 samples per class …")
aug_X, aug_y = [], []

for label in np.unique(train_y):
    idxs = np.where(train_y == label)[0]
    class_imgs = train_X[idxs]
    aug = augment_to_target(class_imgs, target=TARGET_PER_CLASS)
    aug_X.append(aug)
    aug_y.extend([label] * len(aug))

train_X = np.concatenate(aug_X)
train_y = np.array(aug_y, dtype=np.int32)

print(f"Final train: {train_X.shape}  ({train_X.shape[0]//len(np.unique(train_y))} per class)")

Augmenting to 200 samples per class …
Final train: (20000, 64, 64, 1)  (200 per class)


In [8]:
# -------------------------------------------------
# 8. FIXED CNN models (cnn_res now works!)
# -------------------------------------------------
def build_cnn(arch, input_shape=(64,64,1), n_classes=None):
    if arch == 'cnn_res':
        # Build with Functional API from scratch
        inputs = layers.Input(shape=input_shape)
        x = layers.Conv2D(32, 3, activation='relu')(inputs)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling2D(2)(x)

        # Residual block
        shortcut = x
        x = layers.Conv2D(64, 3, padding='same', activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Conv2D(64, 3, padding='same', activation=None)(x)
        x = layers.BatchNormalization()(x)
        shortcut = layers.Conv2D(64, 1, padding='same')(shortcut)
        x = layers.Add()([x, shortcut])
        x = layers.Activation('relu')(x)
        x = layers.MaxPooling2D(2)(x)

        x = layers.Conv2D(128, 3, padding='same', activation='relu')(x)
        x = layers.MaxPooling2D(2)(x)
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dense(256, activation='relu')(x)
        x = layers.Dropout(0.5)(x)
        outputs = layers.Dense(n_classes, activation='softmax')(x)

        model = models.Model(inputs, outputs, name='cnn_res')
    else:
        model = models.Sequential(name=arch)
        model.add(layers.Conv2D(32, 3, activation='relu', input_shape=input_shape))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D(2))

        if arch == 'cnn2':
            model.add(layers.Conv2D(64, 3, activation='relu'))
            model.add(layers.MaxPooling2D(2))
            model.add(layers.Flatten())
            model.add(layers.Dense(128, activation='relu'))
            model.add(layers.Dropout(0.4))
        elif arch == 'cnn3':
            model.add(layers.Conv2D(64, 3, activation='relu'))
            model.add(layers.MaxPooling2D(2))
            model.add(layers.Conv2D(128, 3, activation='relu'))
            model.add(layers.MaxPooling2D(2))
            model.add(layers.Flatten())
            model.add(layers.Dense(256, activation='relu'))
            model.add(layers.Dropout(0.5))

        model.add(layers.Dense(n_classes, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
# -------------------------------------------------
# 9. Train all 3 models
# -------------------------------------------------
results, histories = {}, {}
n_classes = len(np.unique(train_y))

for arch in ['cnn2', 'cnn3', 'cnn_res']:
    print(f"\n=== TRAINING {arch.upper()} ===")
    model = build_cnn(arch, n_classes=n_classes)

    hist = model.fit(train_X, train_y,
                     epochs=EPOCHS,
                     batch_size=BATCH_SIZE,
                     validation_split=0.2,
                     verbose=1)
    histories[arch] = hist

    acc = hist.history['val_accuracy'][-1]
    results[arch] = acc
    print(f"{arch} → Val Acc: {acc:.4%}")

    model.save(f"{NOTEBOOK_ROOT}/{arch}_model.keras")  # ← .keras (modern format)


=== TRAINING CNN2 ===
Epoch 1/8
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.0597 - loss: 4.0951 - val_accuracy: 0.0000e+00 - val_loss: 7.0052
Epoch 2/8
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.2171 - loss: 2.9282 - val_accuracy: 0.0000e+00 - val_loss: 14.5416
Epoch 3/8
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - accuracy: 0.3297 - loss: 2.3458 - val_accuracy: 0.0000e+00 - val_loss: 20.5258
Epoch 4/8
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - accuracy: 0.4254 - loss: 1.9257 - val_accuracy: 0.0000e+00 - val_loss: 33.2114
Epoch 5/8
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.5011 - loss: 1.6171 - val_accuracy: 0.0000e+00 - val_loss: 39.6567
Epoch 6/8
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 19ms/step - accuracy: 0.5512 - loss: 1.4430 - val_accuracy: 0.