## Setup

Load libraries

In [2]:
import shap
import pickle
import numpy as np
import pandas as pd
import xgboost as xgb
from utils.features import UAVSAR_FEATURES, UAVSAR_AND_VEGETATION_HEIGHT_FEATURES

## Load Data

In [3]:
with open('../data/full_splits.pkl', 'rb') as f:
    full_splits = pickle.load(f)


with open('../data/vegetation_splits.pkl', 'rb') as f:
    vegetation_splits = pickle.load(f)


with open('../data/no_vegetation_splits.pkl', 'rb') as f:
    no_vegetation_splits = pickle.load(f)

## Combined Dataset

* InSAR + Vegetation Height Model Training

In [5]:
boosting_params = {
    "sampling_method": "gradient_based",
    'objective': 'reg:squarederror',
    "min_child_weight": 30,
    'learning_rate': 0.05,
    'tree_method': 'hist',
    'booster': 'gbtree',
    'device': 'cuda',
    'max_depth': 0,
    "subsample": 1,
    "max_bin":5096,
    "seed": 42
}

dtrain_combined=xgb.DMatrix(full_splits['X_temp'][UAVSAR_AND_VEGETATION_HEIGHT_FEATURES], label=full_splits['y_temp'])


# Train the model
model = xgb.train(
    params=boosting_params,
    dtrain=dtrain_combined,
    num_boost_round=50 # seeting this to 50 because we didn't see and difference in SHAP importance after 50 rounds
)

* Shap Analysis

In [7]:
explainer=shap.TreeExplainer(model)
shap_values = explainer.shap_values(full_splits['X_temp'][UAVSAR_AND_VEGETATION_HEIGHT_FEATURES])

* Importance

In [None]:
shap_importance_combined=(
    pd.DataFrame(
        data={
            "Feature": model.feature_names,
            "Importance": np.abs(shap_values).mean(axis=0)
        }
    )
    .sort_values(by="Importance", ascending=False)
)

shap_importance_combined.to_csv('../results/XGBoost/AllCombined/all/shap_feature_importances.csv', index=False)

## Open Areas

* InSAR Model Training

In [None]:
dtrain_open=xgb.DMatrix(no_vegetation_splits['X_temp'][UAVSAR_FEATURES], label=no_vegetation_splits['y_temp'])

model_open = xgb.train(
    params=boosting_params,
    dtrain=dtrain_open,
    num_boost_round=50
)

* Shap Analysis

In [None]:
explainer = shap.TreeExplainer(model_open)
shap_values_open = explainer.shap_values(no_vegetation_splits['X_test'][UAVSAR_FEATURES])

* Importance

In [None]:
shap_importance_open=(
    pd.DataFrame(
        data={
            "Feature": model_open.feature_names,
            "Importance": np.abs(shap_values_open).mean(axis=0)
        }
    )
    .sort_values(by="Importance", ascending=False)
)

shap_importance_open.to_csv('../results/XGBoost/NoVegetation/uavsar/shap_feature_importances.csv', index=False)

## Vegetated Areas

* InSAR + Vegetation Height Model Training

In [None]:
dtrain_vegetated=xgb.DMatrix(vegetation_splits['X_temp'][UAVSAR_AND_VEGETATION_HEIGHT_FEATURES], label=vegetation_splits['y_temp'])

model_veg = xgb.train(
    params=boosting_params,
    dtrain=dtrain_vegetated,
    num_boost_round=50
)

In [None]:
explainer = shap.TreeExplainer(model_veg)
shap_values_veg = explainer.shap_values(vegetation_splits['X_test'][UAVSAR_AND_VEGETATION_HEIGHT_FEATURES])

In [None]:
shap_importance_vegetated=(
    pd.DataFrame(
        data={
            "Feature": shap_values_veg.feature_names,
            "Importance": np.abs(shap_values_veg).mean(axis=0)
        }
    )
    .sort_values(by="Importance", ascending=False)
)

shap_importance_open.to_csv('../results/XGBoost/Vegetation/all/shap_feature_importances.csv', index=False)