In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# ----------------------------
# 1. Generate Synthetic Data
# ----------------------------

np.random.seed(42)
n_samples = 1000

# Simulated Remote Sensing Features
data = {
    "Red": np.random.uniform(0.05, 0.5, n_samples),
    "NIR": np.random.uniform(0.1, 0.8, n_samples),
    "SWIR": np.random.uniform(0.05, 0.6, n_samples),
    "Elevation": np.random.uniform(0, 500, n_samples),
    "Slope": np.random.uniform(0, 45, n_samples),
    "NDVI": np.random.uniform(0.2, 0.9, n_samples),
    "EVI": np.random.uniform(0.1, 0.8, n_samples),
}

df = pd.DataFrame(data)

# Synthetic Carbon Stock (tons/ha), nonlinear relationship
df["Carbon_Stock"] = (
    10 + 30 * df["NDVI"]
    + 20 * df["EVI"]
    - 0.01 * df["Elevation"]
    + 5 * np.random.rand(n_samples)
)

# ----------------------------
# 2. Train-Test Split
# ----------------------------

X = df.drop("Carbon_Stock", axis=1)
y = df["Carbon_Stock"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ----------------------------
# 3. Train Ensemble Models
# ----------------------------

rf = RandomForestRegressor(n_estimators=100, random_state=42)
gb = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

rf.fit(X_train, y_train)
gb.fit(X_train, y_train)

# ----------------------------
# 4. Evaluate Models
# ----------------------------

def evaluate(model, name):
    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    r2 = r2_score(y_test, y_pred)
    print(f"{name} - RMSE: {rmse:.2f}, R²: {r2:.2f}")
    return y_pred

y_rf = evaluate(rf, "Random Forest")
y_gb = evaluate(gb, "Gradient Boosting")

# ----------------------------
# 5. Visualization
# ----------------------------

plt.figure(figsize=(10, 5))
plt.scatter(y_test, y_rf, alpha=0.6, label='Random Forest', color='green')
plt.scatter(y_test, y_gb, alpha=0.6, label='Gradient Boosting', color='orange')
plt.plot([y.min(), y.max()], [y.min(), y.max()], '--', color='black')
plt.xlabel("Actual Carbon Stock")
plt.ylabel("Predicted Carbon Stock")
plt.title("Carbon Stock Prediction: Ensemble Models")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
