In [None]:
import os
from PIL import Image
import numpy as np

IMG_SIZE = (128, 128)
MODEL_FILE = 'handwriting_detector.keras'
LOG_CSV = 'training_log.csv'
BEST_MODEL_FILE = 'best_handwriting_detector.keras'

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

def build_model(input_shape=(IMG_SIZE[1], IMG_SIZE[0], 1)):
    """
    Lightweight CNN with BatchNorm, Dropout and GlobalAveragePooling for lower parameters.
    """
    inputs = layers.Input(shape=input_shape)
    x = inputs

    # Block 1
    x = layers.Conv2D(32, (3,3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPool2D((2,2))(x)

    # Block 2
    x = layers.Conv2D(64, (3,3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPool2D((2,2))(x)

    # Block 3
    x = layers.Conv2D(128, (3,3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPool2D((2,2))(x)

    # Extra conv for more capacity if needed
    x = layers.Conv2D(256, (3,3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPool2D((2,2))(x)

    # Global pooling to reduce params
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.4)(x)

    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = models.Model(inputs=inputs, outputs=outputs)
    # Compile
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.summary()
    return model

In [None]:
# Training hyperparams
BATCH_SIZE = 32
EPOCHS = 25
TEST_SIZE = 0.2
RANDOM_STATE = 42

In [None]:
from sklearn.model_selection import train_test_split

def train_model(X, y):
    # shuffle & split
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y)
    print("Train/Val sizes:", X_train.shape, X_val.shape)

    model = build_model(input_shape=X.shape[1:])

    # Callbacks
    ckpt = callbacks.ModelCheckpoint(BEST_MODEL_FILE, monitor='val_accuracy', save_best_only=True, verbose=1)
    csv_logger = callbacks.CSVLogger(LOG_CSV)
    early = callbacks.EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)

    # Data augmentation (light) - optional, can add if dataset small
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=3,
        width_shift_range=0.02,
        height_shift_range=0.02,
        brightness_range=(0.9, 1.1),
        shear_range=0.01,
        zoom_range=0.02
    )
    datagen.fit(X_train)

    steps_per_epoch = max(1, len(X_train) // BATCH_SIZE)

    history = model.fit(datagen.flow(X_train, y_train, batch_size=BATCH_SIZE),
                        epochs=EPOCHS,
                        steps_per_epoch=steps_per_epoch,
                        validation_data=(X_val, y_val),
                        callbacks=[ckpt, csv_logger, early])

    # Save final model
    model.save(MODEL_FILE)
    print(f"Saved final model to {MODEL_FILE}")
    return model, history


In [None]:
from preprocess import preprocess_single_image

def predict_image(path_or_pil, model=None, model_path=None, threshold=0.5):
    """Return label ('AI'/'Human') and probability of 'AI'"""
    if model is None:
        model = tf.keras.models.load_model(model_path)
        
    arr = preprocess_single_image(path_or_pil)
    prob = float(model.predict(arr, verbose=0)[0][0])
    label = 'AI' if prob >= threshold else 'Human'
    return label, prob

In [None]:
# Train
X = np.load('X_data.npy')
y = np.load('y_data.npy')

model, history = train_model(X, y)

In [None]:
# Predict
PATH = "sample.png"

if os.path.exists(BEST_MODEL_FILE):
    mp = BEST_MODEL_FILE
elif os.path.exists(MODEL_FILE):
    mp = MODEL_FILE
else:
    mp = None
print(f"Using model file: {mp}")
label, prob = predict_image(PATH, model=None, model_path=mp)
print(f"Prediction: {label} (AI prob = {prob:.4f})")