# Random Forest

### Load data & split

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
import joblib

# -----------------------------
# LOAD DATA
# -----------------------------
df = pd.read_csv(r"D:\WiDS\Project_sun\excelData\trainingData.csv")

X = df.drop(columns=["PV"])
y = df["PV"]

# -----------------------------
# TRAIN–VALIDATION SPLIT
# -----------------------------
X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=0.20,
    random_state=42
)

print("RF Training samples:", X_train.shape[0])
print("RF Validation samples:", X_val.shape[0])


RF Training samples: 891
RF Validation samples: 223


### Train Random Forest

In [7]:
rf_model = RandomForestRegressor(
    n_estimators=500,
    max_depth=12,          
    min_samples_leaf=3, 
    min_samples_split=8,
    random_state=42,
    n_jobs=-1
)

rf_model.fit(X_train, y_train)

# saving the model
joblib.dump(rf_model, "rf_model.pkl")

print("Random Forest model saved")


Random Forest model saved


### Validate & metrics

In [8]:
rf_pred = rf_model.predict(X_val)

rf_r2 = r2_score(y_val, rf_pred)
rf_rmse = np.sqrt(mean_squared_error(y_val, rf_pred))

print("Random Forest Results")
print("R²   :", round(rf_r2, 3))
print("RMSE :", round(rf_rmse, 3))


Random Forest Results
R²   : 0.869
RMSE : 0.662


### Feature Importance

In [9]:
rf_importance = pd.Series(
    rf_model.feature_importances_,
    index=X.columns
).sort_values(ascending=False)

print(rf_importance)


GHI         0.506412
Temp        0.186516
Cloud       0.183173
Rainfall    0.037476
Railways    0.036155
Settl       0.030073
Road        0.008974
Slope       0.007393
Aspect      0.002345
LULC        0.001483
dtype: float64


# Predicting the Solar Suitability Index

In [11]:
import rasterio
import numpy as np
import joblib

# -----------------------------
# LOAD TRAINED MODEL
# -----------------------------
rf_model = joblib.load("rf_model.pkl")

# -----------------------------
# INPUT RASTERS (ORDER MATTERS)
# -----------------------------
raster_files = [
   r"D:\WiDS\Project_sun\normalized_aligned\norm_GHI.tif",
    r"D:\WiDS\Project_sun\normalized_aligned\norm_Temperature.tif",
    r"D:\WiDS\Project_sun\normalized_aligned\norm_Rainfall.tif",
    r"D:\WiDS\Project_sun\normalized_aligned\norm_CloudCover.tif",
    r"D:\WiDS\Project_sun\normalized_aligned\norm_SettlementProximity.tif",
    r"D:\WiDS\Project_sun\normalized_aligned\norm_Road_Proximity.tif",
    r"D:\WiDS\Project_sun\normalized_aligned\norm_Railway_Proximity.tif",
    r"D:\WiDS\Project_sun\normalized_aligned\norm_Slope.tif",
    r"D:\WiDS\Project_sun\normalized_aligned\norm_Aspect.tif",
    r"D:\WiDS\Project_sun\normalized_aligned\norm_LULC.tif"
]

# -----------------------------
# READ REFERENCE RASTER
# -----------------------------
with rasterio.open(raster_files[0]) as src:
    profile = src.profile
    rows, cols = src.height, src.width

# -----------------------------
# STACK RASTERS
# -----------------------------
stack = []

for path in raster_files:
    with rasterio.open(path) as src:
        stack.append(src.read(1))

stack = np.stack(stack, axis=-1)   # (rows, cols, 10)

# -----------------------------
# RESHAPE FOR ML
# -----------------------------
X_pred = stack.reshape(-1, stack.shape[-1])

# -----------------------------
# PREDICT
# -----------------------------
ssi_pred = rf_model.predict(X_pred)

# -----------------------------
# BACK TO RASTER
# -----------------------------
ssi_raster = ssi_pred.reshape(rows, cols)

# -----------------------------
# SAVE OUTPUT
# -----------------------------
profile.update(dtype="float32", count=1)

with rasterio.open(
    r"D:\WiDS\Project_sun\model\Predicted_Solar_Suitability_RF.tif",
    "w",
    **profile
) as dst:
    dst.write(ssi_raster.astype("float32"), 1)

print("Final solar suitability raster created")




Final solar suitability raster created
