<a href="https://colab.research.google.com/github/StratagemGIS/notebooks/blob/main/projects/39_performing_rf_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Regression Analysis: Performing Random Forest Regression Using ArcGIS Pro**

Vaasudevan Srinivasan 🧑🏻‍💻  
StratagemGIS Solutions

Reference: www.esri.com/training/catalog/6410be434d750615175b2b53/

In [9]:
import fiona
import geopandas as gpd
import pandas as pd
import pooch
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [None]:
zip_file = pooch.retrieve(
    'https://github.com/StratagemGIS/datasets/raw/main/esri/zip/RegressForestRandom.zip',
    known_hash='84b4c8fafdb3043ed2aaaf49f3ff5da2d5a65f35dc168ddcfec2ac46b7e688e2',
    processor=pooch.Unzip(extract_dir='/content')
)

gdb_file = 'RegressForestRandom/RegressionModelMap.gdb'

In [5]:
stations = gpd.read_file(gdb_file, layer=2)

In [7]:
variables = (
    stations.drop(
        columns=['WBANNO', 'LST_YRMO', 'LST_YRMO_Converted',
                 'T_MONTHLY', 'geometry']
    ).columns
)

x, y = stations[variables].values, stations['T_MONTHLY'].values
x_train, x_test, y_train, y_test  = train_test_split(x, y, test_size=0.1)

In [8]:
rf = RandomForestRegressor(
    n_estimators=100,
    max_depth=31,
    oob_score=True,
    random_state=42,
    n_jobs=-1,
)

rf.fit(x_train, y_train)

In [13]:
pd.DataFrame(
    zip(variables, rf.feature_importances_),
    columns=['Variable', 'Importance']
).sort_values('Importance', ascending=False).head(5)

Unnamed: 0,Variable,Importance
27,tasmax,0.833192
19,rlutcs,0.02755
14,ps,0.022914
22,rsdt,0.009484
31,tauv,0.008439


In [14]:
rf.score(x_test, y_test)

0.9226043878884124