## 03. Model Training - GreenSpace CNN

This notebook trains a multi-task CNN using the manifests produced in 02:
- Inputs: `data/processed/splits/{train,val,test}.csv` (paths point to `data/cache/images/`)
- Backbone: EfficientNetB0 (ImageNet weights)
- Heads:
  - Binary features: sigmoid
  - Shade: 2-class softmax (minimal/abundant)
  - Structured–Unstructured: 5-class softmax
  - Vegetation cover distribution: 5-class softmax

Baseline in this notebook: **no augmentation** (we can add it back after the first clean run).


In [1]:
# Imports and paths
import os
import random
import pandas as pd
import numpy as np
import tensorflow as tf
from pathlib import Path

# Global reproducibility controls
GLOBAL_SEED = 123
RNG_STATE_AUG = 123

# Set seeds
random.seed(GLOBAL_SEED)
np.random.seed(GLOBAL_SEED)
tf.random.set_seed(GLOBAL_SEED)

train_csv = Path('../data/processed/splits/train.csv')
val_csv   = Path('../data/processed/splits/val.csv')
test_csv  = Path('../data/processed/splits/test.csv')

assert train_csv.exists() and val_csv.exists() and test_csv.exists(), 'Missing split manifests. Run 02 first.'

train_df = pd.read_csv(train_csv)
val_df   = pd.read_csv(val_csv)
test_df  = pd.read_csv(test_csv)

print('Loaded splits:', len(train_df), len(val_df), len(test_df))


  if not hasattr(np, "object"):


Loaded splits: 1137 379 379


In [2]:
# Build tf.data datasets from manifests
IMG_SIZE = (512, 512)
BATCH_SIZE = 8

# Binary labels are stored as probabilities in *_p columns
binary_cols = [c for c in train_df.columns if c.endswith('_p')]

# Shade/Score/Veg in the current split manifests are stored as integer class columns
# (not one-hot probability columns like shade_p_*/score_p_*/veg_p_*)
HAS_SHADE_CLASS = 'shade_class' in train_df.columns
HAS_SCORE_CLASS = 'score_class' in train_df.columns
HAS_VEG_CLASS   = 'veg_class' in train_df.columns

assert 'image_path' in train_df.columns, "Missing image_path in split manifests"
assert len(binary_cols) > 0, "No binary *_p columns found in split manifests"
assert HAS_SHADE_CLASS, "Missing shade_class in split manifests"
assert HAS_SCORE_CLASS, "Missing score_class in split manifests"
assert HAS_VEG_CLASS, "Missing veg_class in split manifests"

print('Binary labels:', binary_cols)
print('Using class cols:', {'shade_class': HAS_SHADE_CLASS, 'score_class': HAS_SCORE_CLASS, 'veg_class': HAS_VEG_CLASS})

# Configure head sizes + loss modes
NUM_SHADE = 2  # minimal vs abundant
NUM_SCORE = 5  # 1..5
NUM_VEG   = 5  # 1..5

SHADE_LOSS_MODE = 'sparse'  # uses shade_class
SCORE_LOSS_MODE = 'sparse'  # uses score_class
VEG_LOSS_MODE   = 'sparse'  # uses veg_class

# Map a row to (image, label dict)
def decode_image(path):
    img = tf.io.read_file(path)
    img = tf.io.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32) / 255.0
    return img

# Build a dataset from a DataFrame (baseline: no augmentation)
def make_ds(df, shuffle=True):
    paths = df['image_path'].astype(str).tolist()

    # IMPORTANT: fill NaNs to avoid NaN loss during training
    y_bin = df[binary_cols].fillna(0.0).astype(np.float32).values

    # shade_class should be 0/1; clip defensively
    y_shade = df['shade_class'].fillna(0).astype(np.int32).values
    y_shade = np.clip(y_shade, 0, NUM_SHADE - 1)

    # score_class and veg_class are expected 1..5; convert to 0..4 for sparse CE
    y_score = df['score_class'].fillna(1).astype(np.int32).values - 1
    y_score = np.clip(y_score, 0, NUM_SCORE - 1)

    y_veg = df['veg_class'].fillna(1).astype(np.int32).values - 1
    y_veg = np.clip(y_veg, 0, NUM_VEG - 1)

    ds_paths = tf.data.Dataset.from_tensor_slices(paths)
    ds_imgs = ds_paths.map(decode_image, num_parallel_calls=tf.data.AUTOTUNE)

    ds_labels = tf.data.Dataset.from_tensor_slices({
        'bin_head': y_bin,
        'shade_head': y_shade,
        'score_head': y_score,
        'veg_head': y_veg,
    })

    ds = tf.data.Dataset.zip((ds_imgs, ds_labels))
    if shuffle and len(paths) > 1:
        ds = ds.shuffle(buffer_size=len(paths), seed=GLOBAL_SEED, reshuffle_each_iteration=True)
    ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return ds

train_ds = make_ds(train_df, shuffle=True)
val_ds   = make_ds(val_df, shuffle=False)
test_ds  = make_ds(test_df, shuffle=False)

print('Datasets ready.')


Binary labels: ['sports_field_p', 'multipurpose_open_area_p', 'children_s_playground_p', 'water_feature_p', 'gardens_p', 'walking_paths_p', 'built_structures_p', 'parking_lots_p']
Using class cols: {'shade_class': True, 'score_class': True, 'veg_class': True}
Datasets ready.


In [3]:
# Define a multi-head model (EfficientNetB0 backbone)
from tensorflow.keras import layers, models, applications, optimizers

# IMPORTANT:
# Your error occurs *inside* EfficientNetB0(weights='imagenet') before it returns a model,
# so prints placed after that line will never run.
# We'll do a two-step build:
# 1) Build with weights=None and verify the model really expects 3-channel input.
# 2) Try to load ImageNet weights; if it fails, we fall back to weights=None so you can keep training.

tf.keras.backend.clear_session()

INPUT_SHAPE = (512, 512, 3)
print('INPUT_SHAPE used for backbone:', INPUT_SHAPE)
assert INPUT_SHAPE[-1] == 3, f"Expected 3-channel RGB input for imagenet weights, got {INPUT_SHAPE}"

NUM_BIN = len(binary_cols)

# These are configured in the dataset cell (based on the current manifest schema)
assert NUM_SHADE == 2 and NUM_SCORE == 5 and NUM_VEG == 5, (NUM_SHADE, NUM_SCORE, NUM_VEG)

# Explicit input tensor (forces 3-channel model build)
inputs = layers.Input(shape=INPUT_SHAPE, name='img')

# Step 1: sanity-build WITHOUT weights (cannot fail during weight loading)
backbone_no_weights = applications.EfficientNetB0(include_top=False, weights=None, input_tensor=inputs)
stem0 = backbone_no_weights.get_layer('stem_conv')
print('Sanity stem_conv kernel shape (weights=None):', tuple(stem0.kernel.shape))
assert int(stem0.kernel.shape[2]) == 3, (
    "Backbone was built with 1 input channel even though INPUT_SHAPE says 3. "
    "This usually means a global Keras/TensorFlow config is forcing grayscale/channels."
)

# Step 2: try ImageNet weights
try:
    backbone = applications.EfficientNetB0(include_top=False, weights='imagenet', input_tensor=inputs)
    stem = backbone.get_layer('stem_conv')
    print('Loaded ImageNet weights. stem_conv kernel shape:', tuple(stem.kernel.shape))
except Exception as e:
    print('FAILED to load ImageNet weights for EfficientNetB0:', repr(e))
    print('Falling back to weights=None so you can proceed with training.')
    print('If you want to fix ImageNet weights loading, try:')
    print('  - Delete cached EfficientNet weights in ~/.keras/models/ (files with efficientnetb0...) and rerun')
    print('  - Or align package versions: TensorFlow + Keras (mismatched installs can cause weird weight loading)')
    backbone = backbone_no_weights

x = layers.GlobalAveragePooling2D()(backbone.output)

# Heads
bin_out = layers.Dense(NUM_BIN, activation='sigmoid', name='bin_head')(x)
shade_out = layers.Dense(NUM_SHADE, activation='softmax', name='shade_head')(x)
score_out = layers.Dense(NUM_SCORE, activation='softmax', name='score_head')(x)
veg_out = layers.Dense(NUM_VEG, activation='softmax', name='veg_head')(x)

model = models.Model(inputs=inputs, outputs=[bin_out, shade_out, score_out, veg_out])

# Compile (class heads use sparse targets from *_class columns)
losses = {
    'bin_head': 'binary_crossentropy',
    'shade_head': 'sparse_categorical_crossentropy' if SHADE_LOSS_MODE == 'sparse' else 'categorical_crossentropy',
    'score_head': 'sparse_categorical_crossentropy' if SCORE_LOSS_MODE == 'sparse' else 'categorical_crossentropy',
    'veg_head': 'sparse_categorical_crossentropy' if VEG_LOSS_MODE == 'sparse' else 'categorical_crossentropy',
}
metrics = {
    'bin_head': ['binary_accuracy'],
    'shade_head': ['sparse_categorical_accuracy' if SHADE_LOSS_MODE == 'sparse' else 'accuracy'],
    'score_head': ['sparse_categorical_accuracy' if SCORE_LOSS_MODE == 'sparse' else 'accuracy'],
    'veg_head': ['sparse_categorical_accuracy' if VEG_LOSS_MODE == 'sparse' else 'accuracy'],
}
model.compile(optimizer=optimizers.Adam(1e-3), loss=losses, metrics=metrics)

model.summary()



INPUT_SHAPE used for backbone: (512, 512, 3)
Sanity stem_conv kernel shape (weights=None): (3, 3, 3, 32)
Loaded ImageNet weights. stem_conv kernel shape: (3, 3, 3, 32)


In [4]:
# Train (warm-up then fine-tune)
from datetime import datetime

# One tag per run so artifacts don't overwrite each other.

RUN_TAG = globals().get('RUN_TAG', None) or datetime.now().strftime('%Y%m%d_%H%M%S')
print('RUN_TAG:', RUN_TAG)

EPOCHS_WARMUP = 5
EPOCHS_FINETUNE = 10

# Warm-up: freeze backbone, train heads
for layer in model.layers:
    if isinstance(layer, tf.keras.Model) or layer.name.startswith('efficientnet'):
        layer.trainable = False

ckpt_dir = Path('../models'); ckpt_dir.mkdir(parents=True, exist_ok=True)
ckpt_path = ckpt_dir / f'best_{RUN_TAG}.keras'

callbacks = [
    tf.keras.callbacks.ModelCheckpoint(filepath=str(ckpt_path), monitor='val_loss', save_best_only=True, save_weights_only=False),
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2),
]

history_warmup = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS_WARMUP,
    callbacks=callbacks,
    verbose=1,
)

# Fine-tune: unfreeze top backbone blocks
for layer in model.layers:
    layer.trainable = True

model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss=losses, metrics=metrics)

history_finetune = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS_FINETUNE,
    callbacks=callbacks,
    verbose=1,
)

print('Training complete.')


RUN_TAG: 20260129_074737
Epoch 1/5
[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m332s[0m 2s/step - bin_head_binary_accuracy: 0.8033 - bin_head_loss: 0.4353 - loss: 3.7926 - score_head_loss: 1.4201 - score_head_sparse_categorical_accuracy: 0.3879 - shade_head_loss: 0.6388 - shade_head_sparse_categorical_accuracy: 0.6755 - veg_head_loss: 1.2965 - veg_head_sparse_categorical_accuracy: 0.4222 - val_bin_head_binary_accuracy: 0.6534 - val_bin_head_loss: 0.7077 - val_loss: 5.2942 - val_score_head_loss: 2.0913 - val_score_head_sparse_categorical_accuracy: 0.1847 - val_shade_head_loss: 0.6524 - val_shade_head_sparse_categorical_accuracy: 0.6834 - val_veg_head_loss: 1.8594 - val_veg_head_sparse_categorical_accuracy: 0.3509 - learning_rate: 0.0010
Epoch 2/5
[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m347s[0m 2s/step - bin_head_binary_accuracy: 0.8355 - bin_head_loss: 0.3761 - loss: 3.3044 - score_head_loss: 1.2781 - score_head_sparse_categorical_accuracy: 0.4512 - sh

In [5]:
# Evaluation moved to 04_model_evaluation.ipynb
print('Note: Evaluation and threshold calibration are now in 04_model_evaluation.ipynb')


Note: Evaluation and threshold calibration are now in 04_model_evaluation.ipynb


In [6]:
# Save final artifacts: trained model and config
from datetime import datetime
import json

# Reuse the same RUN_TAG used for checkpoints (or make one if needed)
RUN_TAG = globals().get('RUN_TAG', None) or datetime.now().strftime('%Y%m%d_%H%M%S')

save_dir = Path('../models'); save_dir.mkdir(parents=True, exist_ok=True)

# 1) Save final model (may differ from best checkpoint if last epoch improved)
final_path = save_dir / f"final_{RUN_TAG}.keras"
model.save(str(final_path))
print('Saved final model to', final_path)

# 2) Save weights separately (also tagged)
weights_path = save_dir / f"final_{RUN_TAG}.weights.h5"
model.save_weights(str(weights_path))
print('Saved weights to', weights_path)

# 3) Save label/config metadata for inference (also tagged)
# NOTE: split manifests store class targets as integer columns:
# - shade_class: 0/1
# - score_class: 1..5
# - veg_class  : 1..5
# In training we convert score/veg to 0..4 for sparse categorical losses.
config = {
    'run_tag': RUN_TAG,
    'img_size': IMG_SIZE,
    'binary_cols': binary_cols,
    'shade_class_col': 'shade_class',
    'score_class_col': 'score_class',
    'veg_class_col': 'veg_class',
    'num_shade': int(NUM_SHADE),
    'num_score': int(NUM_SCORE),
    'num_veg': int(NUM_VEG),
    'score_veg_training_zero_based': True,
}

config_path = save_dir / f"model_config_{RUN_TAG}.json"
with open(config_path, 'w') as f:
    json.dump(config, f, indent=2)
print('Saved', config_path.name)



Saved final model to ../models/final_20260129_074737.keras
Saved weights to ../models/final_20260129_074737.weights.h5
Saved model_config_20260129_074737.json
