In [1]:
# train.py

import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import pickle

TRAIN_DIR = "/kaggle/input/image-data/train"

PATCH_SIZE = 64
STRIDE = 32
EPOCHS = 200
BATCH_SIZE = 64

def extract_patches(img, patch_size=PATCH_SIZE, stride=STRIDE):
    patches = []
    h, w = img.shape
    for y in range(0, h - patch_size, stride):
        for x in range(0, w - patch_size, stride):
            patch = img[y:y+patch_size, x:x+patch_size]
            if np.mean(patch) < 245:
                patches.append(patch)
    return patches


X, y = [], []

files = [f for f in os.listdir(TRAIN_DIR)
         if f.lower().endswith((".png", ".jpg", ".jpeg"))]

for file in files:
    writer_id = file[:2]
    path = os.path.join(TRAIN_DIR, file)
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

    if img is None:
        continue

    img = cv2.resize(img, None, fx=0.7, fy=0.7)
    patches = extract_patches(img)

    for p in patches:
        p = cv2.normalize(p, None, 0, 255, cv2.NORM_MINMAX)
        p = p / 255.0
        X.append(p)
        y.append(writer_id)

X = np.array(X, dtype=np.float32)[..., np.newaxis]
y = np.array(y)

encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
pickle.dump(encoder, open("label_encoder.pkl", "wb"))

X_train, X_val, y_train, y_val = train_test_split(
    X, y_encoded,
    test_size=0.2,
    stratify=y_encoded,
    random_state=42
)

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(y_train),
    y=y_train
)
class_weights = dict(enumerate(class_weights))

num_classes = len(np.unique(y_encoded))

model = models.Sequential([
    layers.Input(shape=(PATCH_SIZE, PATCH_SIZE, 1)),
    layers.Conv2D(32, 3, activation="relu", padding="same"),
    layers.BatchNormalization(),
    layers.MaxPooling2D(),

    layers.Conv2D(64, 3, activation="relu", padding="same"),
    layers.BatchNormalization(),
    layers.MaxPooling2D(),

    layers.Conv2D(128, 3, activation="relu", padding="same"),
    layers.BatchNormalization(),
    layers.MaxPooling2D(),

    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation="softmax")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    class_weight=class_weights,
    callbacks=[
        callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        callbacks.ReduceLROnPlateau(patience=4)
    ]
)

model.save("writer_patch_level_model.keras")

print(" Training finished")

2025-12-28 02:41:29.493393: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766889689.672359      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766889689.730259      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1766889690.155391      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766889690.155430      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766889690.155432      55 computation_placer.cc:177] computation placer alr

Epoch 1/200


I0000 00:00:1766889710.915616     143 service.cc:152] XLA service 0x7e6ce4006830 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1766889710.915654     143 service.cc:160]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1766889711.400540     143 cuda_dnn.cc:529] Loaded cuDNN version 91002


[1m 24/476[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 7ms/step - accuracy: 0.0084 - loss: 4.6277

I0000 00:00:1766889714.917066     143 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 17ms/step - accuracy: 0.0251 - loss: 4.2588 - val_accuracy: 0.0296 - val_loss: 4.1888 - learning_rate: 1.0000e-04
Epoch 2/200
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.0921 - loss: 3.7155 - val_accuracy: 0.1273 - val_loss: 3.4455 - learning_rate: 1.0000e-04
Epoch 3/200
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.1444 - loss: 3.2984 - val_accuracy: 0.1373 - val_loss: 3.4544 - learning_rate: 1.0000e-04
Epoch 4/200
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.1959 - loss: 2.9690 - val_accuracy: 0.2986 - val_loss: 2.6998 - learning_rate: 1.0000e-04
Epoch 5/200
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.2344 - loss: 2.7279 - val_accuracy: 0.3299 - val_loss: 2.4693 - learning_rate: 1.0000e-04
Epoch 6/200
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [3]:
#test.py
import os
import cv2
import numpy as np
import tensorflow as tf
from collections import Counter
import pickle
import csv

# =========================
# Configuration (MUST MATCH TRAINING)
# =========================
TEST_DIR = "/kaggle/input/image-data/test"

PATCH_SIZE = 64
STRIDE = 32

MODEL_PATH = "writer_patch_level_model.keras"
OUTPUT_CSV = "test_predictions.csv"

# =========================
# Load Model & Encoder
# =========================
model = tf.keras.models.load_model(MODEL_PATH)
encoder = pickle.load(open("label_encoder.pkl", "rb"))

# =========================
# Patch Extraction (SAME AS TRAINING)
# =========================
def extract_patches(img, patch_size=PATCH_SIZE, stride=STRIDE):
    patches = []
    h, w = img.shape

    for y in range(0, h - patch_size, stride):
        for x in range(0, w - patch_size, stride):
            patch = img[y:y+patch_size, x:x+patch_size]

            # skip mostly white patches
            if np.mean(patch) < 245:
                patches.append(patch)

    return patches

# =========================
# Evaluation + CSV Logging
# =========================
correct = 0
total = 0
rows = []

print("Evaluating test data ...")

test_files = [
    f for f in os.listdir(TEST_DIR)
    if f.lower().endswith((".png", ".jpg", ".jpeg"))
]

for file in test_files:
    true_writer = file[:2]   # SAME logic as training
    path = os.path.join(TEST_DIR, file)

    final_prediction = ""
    num_patches = 0
    status = "ok"

    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        status = "skipped_image_read_failed"
        rows.append([file, true_writer, final_prediction, num_patches, "", status])
        continue

    # Optional resize (same as training)
    img = cv2.resize(img, None, fx=0.7, fy=0.7)

    patches = extract_patches(img)
    num_patches = len(patches)

    if num_patches == 0:
        status = "skipped_no_patches"
        rows.append([file, true_writer, final_prediction, num_patches, "", status])
        continue

    X = []
    for p in patches:
        p = cv2.normalize(p, None, 0, 255, cv2.NORM_MINMAX)
        p = p / 255.0
        X.append(p)

    X = np.array(X, dtype=np.float32)[..., np.newaxis]

    # Predict patches
    preds = model.predict(X, verbose=0)
    pred_classes = np.argmax(preds, axis=1)
    pred_labels = encoder.inverse_transform(pred_classes)

    # Majority vote (page-level)
    final_prediction = Counter(pred_labels).most_common(1)[0][0]

    total += 1
    is_correct = int(final_prediction == true_writer)
    correct += is_correct

    print(f"{file} → True: {true_writer}, Predicted: {final_prediction}")

    rows.append([
        file,
        true_writer,
        final_prediction
    ])

# =========================
# Accuracy + Save CSV
# =========================
accuracy = (correct / total) * 100 if total > 0 else 0

print("\nTest Accuracy (page-level): {:.2f}%".format(accuracy))
print(f"Correct: {correct} / Total: {total}")

with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow([
        "filename",
        "true_writer",
        "predicted_writer"
    ])
    writer.writerows(rows)
    writer.writerow([])
    writer.writerow(["SUMMARY", "", "", "", "", ""])
    writer.writerow(["accuracy_percent", accuracy, "correct", correct, "total", total])

print(f"\nSaved results to: {OUTPUT_CSV}")

Evaluating test data ...
10_2_290.png → True: 10, Predicted: 10
04_2_494.png → True: 04, Predicted: 04
01_2_491.png → True: 01, Predicted: 01
66_2_556.png → True: 66, Predicted: 66
57_2_337.png → True: 57, Predicted: 57
42_2_392.png → True: 42, Predicted: 42
54_2_334.png → True: 54, Predicted: 54
43_2_323.png → True: 43, Predicted: 43
20_2_300.png → True: 20, Predicted: 20
22_2_372.png → True: 22, Predicted: 22
55_2_335.png → True: 55, Predicted: 55
62_2_342.png → True: 62, Predicted: 62
51_2_401.png → True: 51, Predicted: 11
53_2_543.png → True: 53, Predicted: 68
50_2_330.png → True: 50, Predicted: 50
06_2_496.png → True: 06, Predicted: 06
04_2_424.png → True: 04, Predicted: 04
07_2_287.png → True: 07, Predicted: 07
24_2_304.png → True: 24, Predicted: 24
70_2_420.png → True: 70, Predicted: 70
46_2_466.png → True: 46, Predicted: 46
60_2_480.png → True: 60, Predicted: 60
41_2_321.png → True: 41, Predicted: 41
39_2_529.png → True: 39, Predicted: 39
18_2_508.png → True: 18, Predicted: 43
