In [1]:
import os , shutil, pathlib
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'   # hides INFO/WARNING logs from TF C++
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.utils import image_dataset_from_directory
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.preprocessing import image
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
folder_dir = r"/home/hamid/ML/Datasets/csiro-biomass/"
file_path = os.path.join(folder_dir, "train.csv")
df = pd.read_csv(file_path)

In [3]:
df["image_path"][1]

'train/ID1011485656.jpg'

In [4]:
df["image_path"][1]
im_dir = os.path.join(folder_dir, df["image_path"][1])

In [5]:
df_gdm = df[df['target_name'].str.contains('GDM')]

In [6]:
df_gdm = df[df['target_name'].str.contains('GDM')].copy()

# 2. Define your base directory (remember to set this correctly!)
BASE_DIR = r"/home/hamid/ML/Datasets/csiro-biomass/" 

# 3. Use .loc when setting the new column on the independent copy
# This is the standard, recommended Pandas way to assign values to a new column.
df_gdm.loc[:, 'full_path'] = BASE_DIR + df_gdm['image_path']

# Verification
print(df_gdm[['image_path', 'full_path']].head())

                image_path                                          full_path
4   train/ID1011485656.jpg  /home/hamid/ML/Datasets/csiro-biomass/train/ID...
9   train/ID1012260530.jpg  /home/hamid/ML/Datasets/csiro-biomass/train/ID...
14  train/ID1025234388.jpg  /home/hamid/ML/Datasets/csiro-biomass/train/ID...
19  train/ID1028611175.jpg  /home/hamid/ML/Datasets/csiro-biomass/train/ID...
24  train/ID1035947949.jpg  /home/hamid/ML/Datasets/csiro-biomass/train/ID...


In [7]:
df_gdm

Unnamed: 0,sample_id,image_path,Sampling_Date,State,Species,Pre_GSHH_NDVI,Height_Ave_cm,target_name,target,full_path
4,ID1011485656__GDM_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,GDM_g,16.2750,/home/hamid/ML/Datasets/csiro-biomass/train/ID...
9,ID1012260530__GDM_g,train/ID1012260530.jpg,2015/4/1,NSW,Lucerne,0.55,16.0000,GDM_g,7.6000,/home/hamid/ML/Datasets/csiro-biomass/train/ID...
14,ID1025234388__GDM_g,train/ID1025234388.jpg,2015/9/1,WA,SubcloverDalkeith,0.38,1.0000,GDM_g,6.0500,/home/hamid/ML/Datasets/csiro-biomass/train/ID...
19,ID1028611175__GDM_g,train/ID1028611175.jpg,2015/5/18,Tas,Ryegrass,0.66,5.0000,GDM_g,24.2376,/home/hamid/ML/Datasets/csiro-biomass/train/ID...
24,ID1035947949__GDM_g,train/ID1035947949.jpg,2015/9/11,Tas,Ryegrass,0.54,3.5000,GDM_g,10.9605,/home/hamid/ML/Datasets/csiro-biomass/train/ID...
...,...,...,...,...,...,...,...,...,...,...
1764,ID975115267__GDM_g,train/ID975115267.jpg,2015/7/8,WA,Clover,0.73,3.0000,GDM_g,40.8300,/home/hamid/ML/Datasets/csiro-biomass/train/ID...
1769,ID978026131__GDM_g,train/ID978026131.jpg,2015/9/4,Tas,Clover,0.83,3.1667,GDM_g,36.7046,/home/hamid/ML/Datasets/csiro-biomass/train/ID...
1774,ID980538882__GDM_g,train/ID980538882.jpg,2015/2/24,NSW,Phalaris,0.69,29.0000,GDM_g,91.6543,/home/hamid/ML/Datasets/csiro-biomass/train/ID...
1779,ID980878870__GDM_g,train/ID980878870.jpg,2015/7/8,WA,Clover,0.74,2.0000,GDM_g,34.3900,/home/hamid/ML/Datasets/csiro-biomass/train/ID...


In [8]:
IMG_SIZE = (224, 224)
X = [] # List to store image arrays
y = df_gdm['target'].values # Target values for the model

for path in df_gdm['full_path']:
    try:
        # Load image and resize it
        img = image.load_img(path, target_size=IMG_SIZE)
        
        # Convert image to array
        img_array = image.img_to_array(img)
        
        # Normalize the pixel values (e.g., to 0-1 range)
        img_array /= 255.0
        
        X.append(img_array)
    except FileNotFoundError:
        print(f"File not found: {path}. Skipping.")
        
X = np.array(X)

In [9]:
X.shape

(357, 224, 224, 3)

In [10]:
# Standardize the targets (z-score). Save mean/std for inverse transform later.
y_mean = y.mean()
y_std = y.std()
y_scaled = (y - y_mean) / y_std

# then use y_scaled in splits
X_train, X_temp, y_train, y_temp = train_test_split(X, y_scaled, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


# Split into 80% training and 20% temporary test/validation
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)

# Split temporary data into 10% validation and 10% test
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f"Train Set Size: {X_train.shape[0]}")
print(f"Validation Set Size: {X_val.shape[0]}")
print(f"Test Set Size: {X_test.shape[0]}")

Train Set Size: 285
Validation Set Size: 36
Test Set Size: 36


In [11]:
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
    ]
)

I0000 00:00:1765482287.774912  173956 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5409 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060, pci bus id: 0000:01:00.0, compute capability: 8.9


In [12]:
inputs = keras.Input(shape=(224, 224, 3))
x = data_augmentation(inputs)
# x = layers.Rescaling(1.0 / 255)(x)
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=512, kernel_size=3, activation="relu")(x)
x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(1, activation="linear")(x)
model = keras.Model(inputs=inputs, outputs=outputs)
model.summary()



In [13]:
model.compile(
    loss=tf.keras.losses.MeanSquaredError(),  # or use "mse"
    optimizer=tf.keras.optimizers.Adam(),     # explicit instance
    metrics=[tf.keras.metrics.MeanAbsoluteError(), tf.keras.metrics.MeanSquaredError()]
)

In [14]:
history = model.fit(
    X_train,
    y_train,
    epochs=30,
    validation_data=(X_val, y_val),
    verbose=2
)


Epoch 1/30
9/9 - 4s - 452ms/step - loss: 1098.9503 - mean_absolute_error: 24.6479 - mean_squared_error: 1098.9503 - val_loss: 442.3658 - val_mean_absolute_error: 16.4733 - val_mean_squared_error: 442.3658
Epoch 2/30
9/9 - 1s - 56ms/step - loss: 685.5711 - mean_absolute_error: 19.3214 - mean_squared_error: 685.5711 - val_loss: 538.0067 - val_mean_absolute_error: 20.1136 - val_mean_squared_error: 538.0067
Epoch 3/30
9/9 - 0s - 54ms/step - loss: 718.3572 - mean_absolute_error: 19.3610 - mean_squared_error: 718.3572 - val_loss: 735.8586 - val_mean_absolute_error: 24.1331 - val_mean_squared_error: 735.8586
Epoch 4/30
9/9 - 0s - 55ms/step - loss: 664.8353 - mean_absolute_error: 19.3085 - mean_squared_error: 664.8353 - val_loss: 789.8641 - val_mean_absolute_error: 24.8638 - val_mean_squared_error: 789.8641
Epoch 5/30
9/9 - 0s - 49ms/step - loss: 619.5000 - mean_absolute_error: 18.8584 - mean_squared_error: 619.5000 - val_loss: 698.6802 - val_mean_absolute_error: 23.0655 - val_mean_squared_err