In [None]:
import os
import numpy as np
import pandas as pd 
from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt


# Read Data

In [None]:
BASE_PATH = '/kaggle/input/csiro-biomass/'

In [None]:
train_targets = pd.read_csv(BASE_PATH + 'train.csv' )[['image_path', 'target_name', 'target']]

n_images = train_targets['image_path'].nunique()

train_targets = train_targets.groupby('image_path').agg({
    'target_name': list,
    'target': list
})

targets = np.vstack(train_targets['target'].values)

cls = []




# Dataset using tf.data (memory-friendly)

In [None]:
images_ds = tf.keras.utils.image_dataset_from_directory(
    BASE_PATH + 'train',
    labels=None, 
    image_size=(1000, 2000),
    batch_size=None, 
    shuffle=False 
)


assert len(images_ds) == n_images, 'Number of images not match'



In [None]:
targets_ds = tf.data.Dataset.from_tensor_slices(targets)

dataset = tf.data.Dataset.zip((images_ds, targets_ds)).shuffle(100, seed=42)

val_size = 0.2

train_size = n_images - int(val_size * n_images)
val_size = int(val_size * n_images)


train_ds = dataset.skip(val_size).repeat().batch(8).prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = dataset.take(val_size).batch(8)

steps_per_epoch  = train_size // 8 # 8 = batch size
validation_steps = val_size // 8  # 8 = batch size








# Simple CNN model with TensorFlow

In [None]:
tf.random.set_seed(42)

data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),  # Randomly flip images horizontally and vertically
    tf.keras.layers.RandomRotation(0.2),                    # Randomly rotate images within ±20%
    tf.keras.layers.RandomZoom(0.1),                        # Randomly zoom in or out up to 20%
    tf.keras.layers.RandomContrast(0.15),                    # Randomly adjust image contrast by ±20%
    tf.keras.layers.RandomBrightness(factor=0.2),           # Randomly adjust image brightness by ±20%
])


In [None]:
tf.random.set_seed(42)

inputs = tf.keras.layers.Input(shape=[1000, 2000, 3])

x = data_augmentation(inputs)
# ===

x = tf.keras.layers.Conv2D(
    filters=32,
    kernel_size=3,
    padding='same',
    activation='relu',
    kernel_initializer='he_normal')(x)

x = tf.keras.layers.BatchNormalization()(x)   
x = tf.keras.layers.MaxPooling2D()(x)
x = tf.keras.layers.Dropout(0.1)(x)
# ===


x = tf.keras.layers.Conv2D(
    filters=64,
    kernel_size=3,
    padding='same',
    activation='relu',
    kernel_initializer='he_normal')(x)

x = tf.keras.layers.BatchNormalization()(x)   
x = tf.keras.layers.MaxPooling2D()(x)
x = tf.keras.layers.Dropout(0.125)(x)
# ===


x = tf.keras.layers.Conv2D(
    filters=128,
    kernel_size=3,
    padding='same',
    activation='relu',
    kernel_initializer='he_normal')(x)

x = tf.keras.layers.BatchNormalization()(x)   
x = tf.keras.layers.MaxPooling2D()(x)
x = tf.keras.layers.Dropout(0.15)(x)
# ===


ga = tf.keras.layers.GlobalAveragePooling2D()(x)
gm = tf.keras.layers.GlobalMaxPooling2D()(x)

merged = tf.keras.layers.Concatenate()([ga, gm])

x = tf.keras.layers.Dense(512, activation='relu')(merged)
x = tf.keras.layers.Dropout(0.1)(x)

outputs = tf.keras.layers.Dense(5)(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.summary()



In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss='mse',
    metrics=[
        tf.keras.metrics.RootMeanSquaredError(name='rmse')
    ]
)

In [None]:
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    filepath='best_model.keras',     # saves only the best model
    monitor='val_loss',
    save_best_only=True,
    mode='min',
    verbose=1
)

# Stop training early if no improvement in val_loss
earlystop_cb = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=8,         # number of epochs to wait before stopping
    restore_best_weights=True,
    verbose=0
)

reduce_lr_cb = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,         # reduce LR by half
    patience=3,
    min_lr=1e-6,
    verbose=0
)


history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=1,
    steps_per_epoch = steps_per_epoch,
    validation_steps = validation_steps,
    callbacks=[checkpoint_cb, earlystop_cb, reduce_lr_cb]
)


#loss: 319.4160 - mae: 12.2534 - rmse: 17.7127 - val_loss: 318.9702 - val_mae: 11.5043 - val_rmse: 17.8597 - learning_rate: 5.0000e-04

# 35/35 ━━━━━━━━━━━━━━━━━━━━ 33s 944ms/step - loss: 414.6152 - mae: 14.1044 - rmse: 20.3407 - val_loss: 863.7164 - val_mae: 17.3120 - val_rmse: 29.3891 - learning_rate: 5.0000e-05


# Submission

In [None]:
model = tf.keras.models.load_model('/kaggle/working/best_model.keras')

In [None]:
test_ds = tf.keras.utils.image_dataset_from_directory(
    directory = '/kaggle/input/csiro-biomass/test',
    labels=None, 
    image_size=(1000, 2000),
    batch_size=16, 
    shuffle=False 
)

test_df = pd.read_csv('/kaggle/input/csiro-biomass/test.csv')





In [None]:
y_pred = model.predict(test_ds).ravel()

In [None]:
submission = pd.DataFrame({
    'sample_id':test_df['sample_id'],
    'target': y_pred
})

In [None]:
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)