# Example Training model

This notebook processes a satellite image data cube stored in NetCDF format, prepares it for modeling, and trains a machine learning model using band data.

In [1]:
import xarray as xr
from pathlib import Path
import joblib
import pickle
from sklearn.decomposition import PCA
import numpy as np
import netCDF4

Define the model class you want to train.

**Note:** Replace MODEL_CLASS with a specific model class like PCA, Isomap, or any scikit-learn-compatible model.

In [7]:
MODEL_CLASS = PCA
MODEL_PARAMS = {
    'random_state': 42
}


We define a base directory where results can be stored and ensure it exists:

In [3]:
# Paths
base_path = Path("results")
if not base_path.is_dir():
    base_path.mkdir()


We also define the input path to the satellite cube:


In [4]:

input_cube_path = r'C:\Users\wannijnj\Documents\Projects\WEED\eo_processing\notebooks\results\master_S2_cube.nc'

The train_model function prepares the data and fits a dimensionality reduction model:


In [5]:
def train_model(cube):
    cube = cube.fillna(0).astype("float32")
    cube = cube.transpose('bands', 'y', 'x')
    bands, y, x = cube.shape
    data = cube.values.reshape((bands, y * x)).T  # (pixels, bands)
    print(f"Training data shape: {data.shape}")

    model = MODEL_CLASS(n_components=3, **MODEL_PARAMS)
    model.fit(data)
    return model

We identify and stack all float-type variables with dimensions ('y', 'x') and then train the model using the preprocessed cube:

In [8]:
# Load dataset
ds = xr.open_dataset(input_cube_path)

# Extract band variables with dims ('y', 'x') and float types
band_vars = [var for var in ds.data_vars if ds[var].dtype.kind in 'fc' and set(ds[var].dims) == {'y', 'x'}]

# Stack bands
cube = xr.concat([ds[var] for var in band_vars], dim="bands")
cube = cube.assign_coords(bands=band_vars)

# Train model
model = train_model(cube)
print(f"Trained model type: {type(model)}")




Training data shape: (25921, 104)
Trained model type: <class 'sklearn.decomposition._pca.PCA'>
