# Model A — Baseline CNN (Instructional)

**Goal:** Train a simple baseline CNN on Rock–Paper–Scissors to establish a reference point.  
**Pipeline summary:** We reuse the `tf.data` input pipeline defined earlier (CSV → decode → pad/resize → normalize → augment(train)).  
**Why baseline?** A compact model helps verify the pipeline, detect bugs (e.g., label mismatches), and set a minimum bar for accuracy.


## 1) Imports, Paths & Config

We load the preprocessing configuration (`preprocess.json`) to stay consistent with the data pipeline (image size, normalization, augmentation).  
We also read the class list from CSVs to keep a fixed label ordering across train/val/test.


In [None]:
from pathlib import Path
import json, csv, tensorflow as tf
from tensorflow import keras
from keras import layers, regularizers
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt

## 2) Input Pipeline

- **Decode**: `tf.io.read_file` → `tf.io.decode_image` (RGB)  
- **Geometry**: `pad_to_square` (preserves aspect), then `tf.image.resize` to `IMG_SIZE×IMG_SIZE`  
- **Normalize**: rescale to `[0, 1]` (or standardize, per config)  
- **Augment (train only)**: flip, slight rotation/zoom/contrast  
- **tf.data**: `shuffle` (train), `map`, `batch`, `prefetch(AUTOTUNE)`  

> This ensures identical preprocessing between all models.


In [None]:
ROOT = Path(".").resolve()
RPS = ROOT / "rps_outputs"
TRAIN_CSV = RPS / "train.csv"
VAL_CSV = RPS / "val.csv"
TEST_CSV = RPS / "test.csv"
PREPROC_JSON = RPS / "preprocess.json"

for p in [RPS, TRAIN_CSV, VAL_CSV]:
    print(p, "OK" if p.exists() else "MISSING")

if PREPROC_JSON.exists():
    PREPROC = json.loads(PREPROC_JSON.read_text())
else:
    PREPROC = {
    "seed": 42,
    "img_size": 128,
    "resize": { "mode": "pad", "width": 128, "hright": 128, "pad_color": [0, 0, 0]},
    "normalize": { "type": "rescale", "scale": 1/255.0},
    "augment": { "flip_horizontal": True, "rotation": 0.08, "zoom": 0.10, "contrast": 0.10}
}

print("PREPROC:", json.dumps(PREPROC, indent=2)[:400], "...")

IMG_SIZE = PREPROC["img_size"]
SEED = int(PREPROC.get("seed", 42))

def collect_classes(*csv_paths):
    labels = set()
    for p in csv_paths:
        if p.exists():
            with open(p, newline="") as f:
                rdr = csv.DictReader(f)
                for row in rdr:
                    labels.add(row["label"])
    classes = sorted(labels)
    label2id = {c:i for i,c in enumerate(classes)}
    return classes, label2id

CLASSES, LABEL2ID = collect_classes(TRAIN_CSV, VAL_CSV, TEST_CSV)
print("classes:", CLASSES)
print("label2id:", LABEL2ID)
NUM_CLASSES = len(CLASSES)

IMG_SIZE = int(PREPROC.get("img_size", 128))
RESIZE = PREPROC.get("resize", {})
TARGET_H = int(RESIZE.get("height", IMG_SIZE))
TARGET_W = int(RESIZE.get("width", IMG_SIZE))
MODE = RESIZE.get("mode", "pad")
PAD_COLOR = tuple(RESIZE.get("pad_color", [0, 0, 0]))

NORM = PREPROC.get("normalize", {"type": "rescale", "scale":1/255.0})
NORM_TYPE = NORM.get("type", "rescale")
SCALE = float(NORM.get("scale", 1/255.0))

AUG = PREPROC.get("augment", {})
ROT = float(AUG.get("rotation", 0.0))
ZOOM = float(AUG.get("zoom", 0.0))
CONTR = float(AUG.get("contrast", 0.0))
FLIP = bool(AUG.get("flip_horizontal", False))

def decode_image(path):
    data = tf.io.read_file(path)
    img = tf.io.decode_image(data, channels=3, expand_animations=False)
    img.set_shape([None, None, 3])
    return img

def pad_to_square(img):
    h = tf.shape(img)[0]; w = tf.shape(img)[1]

    dim = tf.maximum(h, w)

    pad_top = (dim - h) // 2
    pad_bottom = dim - h - pad_top
    pad_left = (dim - w) // 2
    pad_right = dim - w - pad_left

    padded = tf.pad(img, [[pad_top, pad_bottom], [pad_left, pad_right], [0, 0]], constant_values=0)

    if PAD_COLOR != (0, 0, 0):
        color = tf.reshape(tf.constant(PAD_COLOR, img.dtype), [1, 1, 3])
        mask = tf.pad(tf.ones_like(img[:, :, 0:1], dtype=img.dtype),
                     [[pad_top, pad_bottom], [pad_left, pad_right], [0, 0]], constant_values=0)
        bg = tf.ones_like(padded) * color
        padded = padded*mask + bg*(1.0 - mask)
    return padded

def resize_step(img):
    if MODE == "pad":
        img = pad_to_square(img)
    img = tf.image.resize(img, [TARGET_H, TARGET_W])
    return img

def normalize_step(img):
    img = tf.cast(img, tf.float32)
    if NORM_TYPE == "rescale":
        img = img * SCALE
    elif NORM_TYPE == "standardize":
        img = tf.image.per_image_standardization(img)
    else:
        img = img * SCALE
    return img

def augment_step(img):
    if FLIP:
        img = tf.image.random_flip_left_right(img)
    if ZOOM > 0.0:
        scale = 1.0 + tf.random.uniform([], -ZOOM, ZOOM)
        h = tf.shape(img)[0]; w = tf.shape(img)[1]
        nh = tf.cast(tf.cast(h, tf.float32) * scale, tf.int32)
        nw = tf.cast(tf.cast(w, tf.float32) * scale, tf.int32)
        img = tf.image.resize(img, [nh, nw])
        img = tf.image.resize_with_crop_or_pad(img, h, w)
    if CONTR > 0.0:
        img = tf.image.random_contrast(img, lower=1.0-CONTR, upper=1.0+CONTR)
    return img

AUTOTUNE = tf.data.AUTOTUNE

CLASSES_T = tf.constant(CLASSES)
IDS_T = tf.constant(list(range(NUM_CLASSES)), dtype=tf.int32)
LABEL_TABLE = tf.lookup.StaticHashTable(
    tf.lookup.KeyValueTensorInitializer(keys=CLASSES_T, values=IDS_T), default_value=-1
)

def parse_row(path_str, label_str, training: bool):
    img = decode_image(path_str)
    img = resize_step(img)
    if training:
        img = augment_step(img)
    img = normalize_step(img)
    y = tf.one_hot(LABEL_TABLE.lookup(label_str), depth=NUM_CLASSES, dtype=tf.float32)
    return img, y

def read_csv_dataset(csv_path, training: bool, batch_size=32, shuffle_buffer=2048):
    ds = tf.data.TextLineDataset(str(csv_path)).skip(1)

    def _split(line):
        parts = tf.strings.split(line, sep=",")
        return parts[0], parts[1]

    ds = ds.map(_split, num_parallel_calls=AUTOTUNE)
    if training:
        ds = ds.shuffle(shuffle_buffer, seed=SEED, reshuffle_each_iteration=True)
    ds = ds.map(lambda p,l: parse_row(p, l, training), num_parallel_calls=AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(AUTOTUNE)
    return ds

BATCH_SIZE = 32
train_ds = read_csv_dataset(TRAIN_CSV, training=True, batch_size=BATCH_SIZE)
val_ds = read_csv_dataset(VAL_CSV, training=False, batch_size=BATCH_SIZE)
test_ds = read_csv_dataset(TEST_CSV, training=False, batch_size=BATCH_SIZE) if TEST_CSV.exists() else None

# Steps per epoch (full epochs)
def count_rows(csv_path):
    with open(csv_path, newline="") as f:
        return sum(1 for _ in csv.DictReader(f))

steps_per_epoch = math.ceil(count_rows(TRAIN_CSV) / BATCH_SIZE)
val_steps = math.ceil(count_rows(VAL_CSV) / BATCH_SIZE)

# (Optional) class weights, but cap to avoid instability
def class_counts(csv_path):
    cnt = Counter()
    with open(csv_path, newline="") as f:
        for row in csv.DictReader(f):
            cnt[row["label"]] += 1
    return cnt

cnts = class_counts(TRAIN_CSV)
total = sum(cnts.values())
raw_w = {LABEL2ID[c]: total / (NUM_CLASSES * max(1, cnts.get(c, 0))) for c in CLASSES}
class_weights = {k: min(v, 2.0) for k, v in raw_w.items()}
print("Class counts:", cnts)
print("Class weights (capped):", class_weights)

try:
  from keras import mixed_percision
  mixed_percision.set_global_policy("mixed_float16")
  FINAL_DTYPE = "float32"
except Exception:
  FINAL_DTYPE = None

## 3) Model A Architecture (Why this design?)

- **3 conv stages** (32→64→128) with **BatchNorm** + **ReLU**  
- **MaxPool** after first two stages (downsample); **GAP** before the head (parameter-efficient)  
- **Dropout** grows with depth (0.10 → 0.15 → 0.30 at the dense head)  
- **Softmax** outputs `num_classes` probabilities

This is intentionally lightweight to give a quick, stable baseline on small, clean datasets.


In [None]:
def build_model_a(input_shape, num_classes):
    inputs = keras.Input(shape=input_shape)
    x = layers.Conv2D(32, 3, padding="same")(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.10)(x)

    x = layers.Conv2D(64, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.15)(x)

    x = layers.Conv2D(128, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.GlobalAveragePooling2D()(x)

    x = layers.Dense(128, activation="relu")(x)
    x = layers.Dropout(0.30)(x)

    outputs = layers.Dense(num_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs, name="ModelA_Baseline")


## 4) Compile & Training Setup

- **Loss**: Categorical Crossentropy  
- **Optimizer**: Adam (default lr works well for this baseline)  
- **Metrics**: Accuracy  
- **Callbacks**: 
  - `ModelCheckpoint(save_best_only=True, monitor="val_accuracy")`
  - `EarlyStopping(patience=5, restore_best_weights=True)`

We use `.repeat()` on datasets + fixed `steps_per_epoch`/`validation_steps` to enforce **full epochs**.  
> Tip: Use `steps_per_epoch = ceil(train_rows / batch_size)` to cover all batches exactly once per epoch.


In [None]:
MODEL_A = build_model_a((IMG_SIZE, IMG_SIZE, 3), NUM_CLASSES)

MODEL_A.compile(
    optimizer=keras.optimizers.Adam(),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath=str(RPS / "checkpoints/model_a_best.keras"),
        monitor="val_accuracy", mode="max", save_best_only=True
    ),
    keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=5, restore_best_weights=True),
]

In [None]:
EPOCHS = 40
history = MODEL_A.fit(
    train_ds.repeat(), 
    validation_data=val_ds.repeat(), 
    epochs=EPOCHS, 
    steps_per_epoch=40,
    validation_steps=20,
    callbacks=callbacks,
    verbose=1
)

## 5) Evaluation & Diagnostics

- Run predictions on **test** (or val if test is absent)
- Report **confusion matrix** and **classification report** (precision/recall/F1 by class)
- Save final **test metrics** to JSON for the report


In [None]:
y_true, y_pred = [], []
for x,y in (test_ds or val_ds):
    p = MODEL_A.predict(x, verbose=0)
   z y_true.extend(np.argmax(y.numpy(), axis=1))
    y_pred.extend(np.argmax(p, axis=1))

print(confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred, target_names=CLASSES))

In [None]:
if test_ds is not None:
    test_metrics = MODEL_A.evaluate(test_ds, return_dict=True)
    print("Test metrics:", test_metrics)
    with open("rps_outputs/model_a_test_metrics_json", "w") as f:
        json.dump({k: float(v) for k, v in test_metrics.items()}, f, indent=2)

model.save("rps_outputs/model_a_final.keras")

## 6) Learning Curves

We plot accuracy and loss for train vs. validation to:
- Spot **overfitting** (train ↑, val ↔/↓)
- Verify **convergence** (both curves stabilize)
- Compare across experiments quickly


In [None]:
plt.figure()
plt.plot(history.history["accuracy"], label="train_acc")
plt.plot(history.history["val_accuracy"], label="val_acc")
plt.xlabel("Epoch"); plt.ylabel("Accuracy"); plt.legend(); plt.title("Model A Accuracy")
plt.show()

plt.figure()
plt.plot(history.history["loss"], label="train_acc")
plt.plot(history.history["val_loss"], label="val_acc")
plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.legend(); plt.title("Model A Loss")
plt.show()