Published on October 28, 2025. By Prata, Marília (mpwolke)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Biomass from Top-View Images - task of pasture biomass estimation

" Accurate estimation of pasture biomass is important for decision-making in livestock production
 systems. Estimates of pasture biomass can be used to manage stocking rates to maximise pasture
 utilisation, while minimising the risk of overgrazing and promoting overall system health. The authors presented a comprehensive dataset of 1,162 annotated top-view images of pastures collected across 19 locations in Australia. The images were taken across multiple seasons and include a range of temperate pasture species."
 
"Each image is paired with on-ground measurements including biomass sorted by component (green, dead, and legume fraction), vegetation height, and Normalized Difference Vegetation Index (NDVI) from Active Optical Sensors (AOS). The multidimensional nature of the data, which combines visual, spectral, and structural information."

"Reflecting real-world complexity, the dataset presents technical challenges such as extreme biomass **density variation, occlusion in dense canopies, and complex spatial patterns in mixed species** swards. While comprehensive in scope, limitations include a geographic focus on Australian temperate systems, an emphasis on six major pasture species, and a fixed quadrat size, which may not capture broader landscape-scale patterns." 

**Citation:**

@misc{liao2025estimatingpasturebiomasstopview,

      title={Estimating Pasture Biomass from Top-View Images: A Dataset for Precision Agriculture},

      author={Qiyu Liao and Dadong Wang and Rebecca Haling and Jiajun Liu and Xun Li and Martyna Plomecka and Andrew Robson and Matthew Pringle and Rhys Pirie and Megan Walker and Joshua Whelan},

      year={2025},

      eprint={2510.22916},

      archivePrefix={arXiv},

      primaryClass={cs.CV},

      url={https://arxiv.org/abs/2510.22916},

}

https://arxiv.org/pdf/2510.22916

## Load submission file

In [None]:
sub = pd.read_csv('/kaggle/input/csiro-biomass/sample_submission.csv')
sub.tail()

## Test images (only one image)

In [None]:
from PIL import Image

imgs_dir = '../input/csiro-biomass/test/'
Image.open(imgs_dir + 'ID1001187975.jpg')

## About Competition

"This competition challenges you to bring greener solutions to the field: build a model that predicts pasture biomass from images, ground-truth measures, and publicly available datasets. You’ll work with a professionally annotated dataset covering Australian pastures across different seasons, regions, and species mixes, along with NDVI values to enhance your models."

https://www.kaggle.com/competitions/csiro-biomass

In [None]:
import cv2
import glob
from glob import glob

In [None]:
import matplotlib.pyplot as plt

def plotImages(tools,directory):
    print(tools)
    multipleImages = glob(directory)
    plt.rcParams['figure.figsize'] = (8, 8) #Original is 15,15. Since we have 18 veggies I decreased the size
    plt.subplots_adjust(wspace=0, hspace=0)
    i_ = 0
    for l in multipleImages[:25]: #Original is 25
        im = cv2.imread(l)
        im = cv2.resize(im, (256, 256)) 
        plt.subplot(5, 5, i_+1) #.set_title(l)
        plt.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB)); plt.axis('off')
        i_ += 1

plotImages("Biomass train images","../input/csiro-biomass/train/**")

## Competition citation

@misc{csiro-biomass,

author = {Qiyu Liao and Dadong Wang and Rhys Pirie and Joshua Whelan and Rebecca Haling and Jiajun Liu and Rizwan Khokher and Xun Li and Martyna Plomecka and Addison Howard},

title = {CSIRO - Image2Biomass Prediction},
year = {2025},

howpublished = {\url{https://kaggle.com/competitions/csiro-biomass}},
note = {Kaggle}
}

## Load test file

In [None]:
test = pd.read_csv('/kaggle/input/csiro-biomass/test.csv')
test.tail()

## Load train file

In [None]:
train = pd.read_csv('/kaggle/input/csiro-biomass/train.csv')
train.tail()

## info() method

In [None]:
train.info()

## Five key biomass components

"Your task is to use pasture images to predict five key biomass components critical for grazing and feed management:"

* Dry green vegetation (excluding clover)

* Dry dead material

* Dry clover biomass

* Green dry matter (GDM)

* Total dry biomass

"Accurately predicting these quantities will help farmers and researchers monitor pasture growth, optimize feed availability, and improve the sustainability of livestock systems."

https://www.kaggle.com/competitions/csiro-biomass

In [None]:
train['target_name'].value_counts()

## Australian state where sample was collected.

Tasmania (Tas), Victoria (Vic), New South Wales (NSW) and Western Austalia (WA)

In [None]:
train['State'].value_counts()

In [None]:
labels = 'Tas', 'Vic', 'NSW', 'WA' # Tasmania, Victoria,New South Wales and Western Australia
sizes = [690, 560, 375, 160]  #must have same number labels, sizes and explode
explode = (0, 0.2, 0, 0)  # only "explode" the 2nd slice 

fig1, ax1 = plt.subplots(figsize=(6,6))
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.show()

### Species — Pasture species present, ordered by biomass (underscore-separated).

We have 15 pasture species

In [None]:
import seaborn as sns

#By Pedro Andrade https://www.kaggle.com/code/pbizil/datahackers-managers-radiografia-dos-gestores

species_counts = train["Species"].value_counts().head(15)#Try different values of head
sns.set(style="white")
plt.figure(figsize=(8, 6))
#x=type_counts.index, y=loc_counts.values
ax = sns.barplot(x=species_counts.index, y=species_counts.values, color=sns.color_palette("Greens", n_colors=5)[3])
plt.title("Distribution of Pasture Species", fontsize=16)
plt.xlabel("Tags", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.xticks(rotation=60, fontsize=11)
plt.yticks(fontsize=11)
sns.despine()

#+2 is good if chart is vertical. +20 worked for horizontal
for i, v in enumerate(species_counts.values):
    ax.text(i, v + 2, str(v), ha='center', va='bottom', fontsize=10)

plt.tight_layout()
plt.show()

In [None]:
numerical_cols = ['Pre_GSHH_NDVI', 'Height_Ave_cm', 'target']

**Pre_GSHH_NDVI** — Normalized Difference Vegetation Index (GreenSeeker) reading.

**Height_Ave_cm** — Average pasture height measured by falling plate (cm).

In [None]:
#Original figsize 15,10
train[numerical_cols].hist(figsize=(10,8), bins=30, color='Green', edgecolor='black')
plt.suptitle("Histogram of Numeric Features")
plt.show()

#### target and Height_Ave_cm are right skewed

In [None]:
import random
from PIL import Image  # Pillow library for image handling
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split

In [None]:
#By @jocelyndumlao

DATA_DIR = "../input/csiro-biomass"  
IMAGE_SIZE = (128, 128)  # Reduced size for faster training; adjust as needed
BATCH_SIZE = 32
SEED = 42  # For reproducibility

In [None]:
#By @jocelyndumlao

def load_and_preprocess_data(data_dir, image_size):
    """
    Loads images from the specified directory, resizes them, and converts them to numpy arrays.

    Assumes that all files in the directory are images.  No subdirectories are supported in this basic example.

    Args:
        data_dir (str): Path to the directory containing the images.
        image_size (tuple): The desired size of the images (width, height).

    Returns:
        tuple: A tuple containing:
            - A numpy array of image data (shape: (num_images, height, width, channels)).
            - A numpy array of corresponding labels (all zeros in this case, since it's a single class).
    """
    image_paths = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith(".jpg")] #also change to gif
    images = []
    for image_path in image_paths:
        try:
            img = Image.open(image_path).convert('RGB').resize(image_size)  # Handle GIFs, convert to RGB
            img_array = np.array(img) / 255.0  # Normalize pixel values
            images.append(img_array)
        except Exception as e:
            print(f"Error processing {image_path}: {e}")
            continue #skip to the next image

    if not images:
        raise ValueError("No images were loaded from the directory.")

    images = np.array(images)
    labels = np.zeros(len(images))  # All images are from the same class (Quran page)

    return images, labels

In [None]:
#By @jocelyndumlao

try:
    images, labels = load_and_preprocess_data(DATA_DIR, IMAGE_SIZE)
except ValueError as e:
    print(e)  # Print the error message and exit
    exit()

### The initial notebook froze, the Draft is starting again till now : ) Then I deleted it.

## Okey,dokey! Done

![](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQdbcwSYH4Vmy6qgtRw5zAmcA4_v7X2upq8qw&s)EcoMENA

#Acknowledgements:

Pedro Andrade https://www.kaggle.com/code/pbizil/datahackers-managers-radiografia-dos-gestores