## Setup

Load libraries

In [1]:
import os
import numpy as np
import polars as pl
import pandas as pd
import seaborn as sns
import scipy.io as sio
from matplotlib import pyplot as plt


from typing import Dict, Union
from sklearn.pipeline import Pipeline
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.model_selection import train_test_split, PredefinedSplit, GridSearchCV

seed=10

## Load Data

In [3]:
raster = sio.loadmat('UAVSAR-ASO-WMesa.mat')

## Data Preprocessing

* Let's extract all useful variables

In [5]:
feb8_snow_depth=raster['r'][0][0][0]
feb21_snow_depth=raster['r'][0][1][0]
feb25_snow_depth=raster['r'][0][2][0]
sept26_bare_DEM=raster['r'][0][3][0]
sept26_veg_ht=raster['r'][0][4][0]
amplitude=raster['r2'][0][0][0]
unwrapped_phase=raster['r2'][0][1][0]
coherence=raster['r2'][0][2][0]
incidence_angle=raster['r2'][0][3][0]
elevation=raster['r2'][0][4][0]
wrapped_phase=raster['r2'][0][5][0]
easting=raster['r'][0][0][1]
northing=raster['r'][0][0][2]


# Create a dataframe
df = pl.DataFrame(
    {
    'amplitude': amplitude.ravel(),
    'unwrapped_phase': unwrapped_phase.ravel(),
    'coherence': coherence.ravel(),
    'incidence_angle': incidence_angle.ravel(),
    'bare_earth_dem': sept26_bare_DEM.ravel(),
    'vegetation_ht': sept26_veg_ht.ravel(),
    'wrapped_phase': wrapped_phase.ravel(),
    'snow_depth': feb8_snow_depth.ravel(),
    'elevation': elevation.ravel()
    }
)

df.head()

amplitude,unwrapped_phase,coherence,incidence_angle,bare_earth_dem,vegetation_ht,wrapped_phase,snow_depth,elevation
f64,f64,f64,f64,f32,f32,f64,f32,f64
0.424673,-10.83723,0.639889,0.929004,-9999.0,-9999.0,1.805211,-9999.0,2456.656494
0.424673,-10.83723,0.639889,0.929004,-9999.0,-9999.0,1.805211,-9999.0,2456.656494
0.38162,-10.83723,0.594642,0.929004,-9999.0,-9999.0,1.824319,-9999.0,2457.156494
0.38162,-10.83723,0.594642,0.926934,-9999.0,-9999.0,1.746802,-9999.0,2457.656494
0.369751,-10.83723,0.526593,0.924989,-9999.0,-9999.0,1.824319,-9999.0,2458.656616


* Data Cleaning

    - First `with_columns`: Process "snow_depth" column.
       * Replace values of 0 with `None`.
       * Replace values of 253 with `None`.
       * Replace values greater than 2 with `None`.
       * Keep all other values as they are.
    - Second `with_columns`: Process all columns.
       * Replace any value of -9999.0 with `None`.
    - `fill_nan`: Replace any remaining NaN values with `None`.

In [6]:
clean_data=(
    df.with_columns(
        [
            pl.when(pl.col('snow_depth') == 0)
            .then(None)
            .when(pl.col('snow_depth') == 253)
            .then(None)
            .when(pl.col('snow_depth') > 2)
            .then(None)
            .otherwise(pl.col('snow_depth')).keep_name()
        ]
    )
    .with_columns(
        [
            pl.when(pl.col('*') == -9999.0).then(pl.lit(None)).otherwise(pl.col('*'))
            .keep_name()
        ]
    )
    .fill_nan(None)
)

clean_data.head()

amplitude,unwrapped_phase,coherence,incidence_angle,bare_earth_dem,vegetation_ht,wrapped_phase,snow_depth,elevation
f64,f64,f64,f64,f32,f32,f64,f32,f64
0.424673,-10.83723,0.639889,0.929004,,,1.805211,,2456.656494
0.424673,-10.83723,0.639889,0.929004,,,1.805211,,2456.656494
0.38162,-10.83723,0.594642,0.929004,,,1.824319,,2457.156494
0.38162,-10.83723,0.594642,0.926934,,,1.746802,,2457.656494
0.369751,-10.83723,0.526593,0.924989,,,1.824319,,2458.656616
