In [3]:
import pandas as pd
import geopandas as gpd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
import estimators as est

##### Data preprocessing

In [4]:
df = gpd.read_file("processed_sample_data.gpkg", driver = "gpkg")

df_copy = df.copy()

# Drop NA values from dataset
df_copy.dropna(inplace = True)
df_copy.reset_index(drop=True, inplace=True)

# Convert landuse_type into dummies for RF
df_copy = pd.get_dummies(df_copy, columns = ['landuse_type'])

##### Separate variables into X and Y

In [5]:
# Target variable (Y) is SOC; the remaining variables are predictor variables (X)
y = df_copy.loc[:, 'soc']
X = df_copy.drop(['soc'], axis = 1)

#### 5-fold cross validation method

In [6]:
# 5-fold cross validation
from sklearn.model_selection import KFold
kf = KFold(n_splits = 5, shuffle = True, random_state = 1)

#### Build Random Forest with Regression Kriging

In [7]:
# Use optimal hypermarameters found by Kmoch et al. 2021
rf_ok = est.OrdinaryKrigingRF()

#### Check the performance of Random Forest With KNN

In [8]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error, r2_score, make_scorer

# Combine scross_val_score and KFold to evaluate model 5-fold CV

# Root mean squared error (RMSE)
rf_rmse_scores = -cross_val_score(rf_ok, X, y, cv = kf, scoring = 'neg_root_mean_squared_error')
mean_rf_rmse = rf_rmse_scores.mean()

# Mean absolute eror (MAE)
rf_mae_scores = -cross_val_score(rf_ok, X, y, cv = kf, scoring = 'neg_mean_absolute_error')
mean_rf_mae = rf_mae_scores.mean()

# R-sqaured (R2)
def r2_scorer(y_true, y_pred):
    return r2_score(y_true, y_pred)
    
rf_r2_scorers = cross_val_score(rf_ok, X, y, cv = kf, scoring = make_scorer(r2_scorer))
mean_rf_r2 = rf_r2_scorers.mean()

In [9]:
# Create Table for the results
rf_performance_df = pd.DataFrame([["Mean R2", mean_rf_r2], ["Mean RMSE", mean_rf_rmse], ["Mean MAE", mean_rf_mae]], columns = ['Metrics', 'Score'])

rf_performance_df

Unnamed: 0,Metrics,Score
0,Mean R2,0.613528
1,Mean RMSE,7.440005
2,Mean MAE,4.370407
