# Example Training model

In [None]:
import xarray as xr
from pathlib import Path
import joblib
import pickle
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE, Isomap as Isomap, SpectralEmbedding as SE, LocallyLinearEmbedding as LLE
from umap import  UMAP
import numpy as np

In [2]:
## creates a output directory if it doesn't exist.
base_path = Path("results")
if not base_path.is_dir():
    base_path.mkdir()

In [None]:
MODEL_CLASS = PCA

def train_model(cube):
    """
    Applies given model class on a given data cube for dimensionality reduction. 
    The input cube is processed to handle missing values and ensure the correct data format.

    Note: The function name and arguments are defined by the UDF API.
    More information can be found here:
    https://open-eo.github.io/openeo-python-client/udf.html#udf-function-names-and-signatures

    :param cube: The data cube on which dimensionality reduction will be applied. It must be an `xr.DataArray`.
    :return: A fitted dimensionality reduction model
    """
    # Fill NaNs and cast to float32
    cube = cube.fillna(0).astype("float32")

    # Ensure dimension order ('bands', 'y', 'x')
    cube = cube.transpose('bands', 'y', 'x')

    # Reshape to (pixels, bands)
    bands, y, x = cube.shape
    data = cube.values.reshape((bands, y * x)).T  # shape: (pixels, bands)

    # Apply Dimensionality reduction
    n_components = 3
    model = MODEL_CLASS(n_components=n_components, random_state=42)
    model.fit(data)

    return model


# Load the data cube
base_path = Path(".")  # or your actual path
ds = xr.open_dataset(base_path / "master_S2_cube.nc")

# Extract valid band variables and stack them into a DataArray
band_vars = [var for var in ds.data_vars if ds[var].dtype.kind in 'fc' and set(ds[var].dims) == {'y', 'x'}]
cube = xr.concat([ds[var] for var in band_vars], dim="bands")
cube = cube.assign_coords(bands=band_vars)

# Train the dimensionality reduction model
model = train_model(cube)
print(type(model))

# Save the trained dimensionality reduction model
model_path = base_path / "dim_reduction.pkl"
with open(model_path, "wb") as f:
    pickle.dump(model, f)

<class 'sklearn.manifold._spectral_embedding.SpectralEmbedding'>


In [5]:
!python -c "import site; print(site.getsitepackages())"


['c:\\Users\\wannijnj\\AppData\\Local\\anaconda3\\envs\\weed', 'c:\\Users\\wannijnj\\AppData\\Local\\anaconda3\\envs\\weed\\Lib\\site-packages']


In [3]:
import os
import zipfile

def zip_dir(folder_path, output_path, exclude_patterns=None):
    exclude_patterns = exclude_patterns or []
    
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                # Check if file matches exclude patterns
                if any(fnmatch.fnmatch(file_path, pattern) for pattern in exclude_patterns):
                    continue
                arcname = os.path.relpath(file_path, folder_path)
                zipf.write(file_path, arcname)

import fnmatch

exclude_patterns = [
    "*/dist-info/*",
    "*.pyc",
    "*/__pycache__/*",
    "easy_install.py",
    "pip*",
    "isympy.py"
]
folder = r'C:\Users\wannijnj\AppData\Local\anaconda3\envs\env-dim-reduction-models\Lib\site-packages'

zip_dir(folder, r"C:\Users\wannijnj\Documents\Projects\dim-reduction-udf-deps-v01.zip", exclude_patterns)