In [1]:
import ee
import os
import time
import joblib
import geemap
import datetime
import pandas as pd
from geemap import ml
from sklearn import ensemble

In [2]:
# ee.Authenticate()

# Initialize Earth Engine
try:
    ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')
    print('Google Earth Engine initialized successfully!')
except ee.EEException as e:
    print('Google Earth Engine failed to initialize!', e)
    raise

Google Earth Engine initialized successfully!


In [47]:
INPUT_FOLDER = "./rfr_joblib"
OUTPUT_FOLDER = "./rfr_csv"
AEZ = 15
date = "2025-06-29" # datetime.datetime.today().date()
PRED_VARIABLE = "N"
MODEL_FILE_NAME = f"rfr_model_{date}_AEZ_{AEZ}_{PRED_VARIABLE}.joblib"
rf = joblib.load(os.path.join(INPUT_FOLDER, MODEL_FILE_NAME))
CSV_FILE_NAME = f"rf_trees_t{rf.get_params()['n_estimators']}_d{rf.get_params()['max_depth']}_{date}_AEZ_{AEZ}_{PRED_VARIABLE}.csv"
feature_names = rf.feature_names_in_.tolist()

print(feature_names)

['temp', 'RI', 'elevation', 'precipitation', 'clay05', 'sand05', 'silt05', 'sand515', 'longitude', 'latitude']


In [48]:
start_time = time.perf_counter()

# convert the estimator into a list of strings
trees = ml.rf_to_strings(rf, feature_names, processes=8, output_mode="REGRESSION")

end_time = time.perf_counter()
elapsed_time = end_time - start_time
a = datetime.timedelta(seconds=elapsed_time)
print("Time taken : " + str(a))

Time taken : 0:03:20.113050


In [49]:
sizes = [len(trees[i]) for i in range(len(trees))]
print(sizes)

[615226, 658509, 737003, 681238, 662853, 645488, 610532, 701449, 621817, 747864]


In [50]:
file_path = os.path.join(OUTPUT_FOLDER, CSV_FILE_NAME)
ml.trees_to_csv(trees, file_path)

In [37]:
df = pd.read_csv(file_path, header=None)

In [38]:
n = len(df)
print(f"Number of trees: {n}")

sizes = [len(df.loc[i].values[0]) for i in range(n)]
print(f"Tree sizes : {sizes}")

Number of trees: 10
Tree sizes : [1111144, 1025098, 975822, 1028173, 1053202, 1062707, 991867, 955893, 1147119, 1082272]
