In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# -----------------------
# Step 1: Generate Synthetic Data
# -----------------------
n_samples = 1000

# Simulated features
ndvi = np.random.uniform(0, 1, n_samples)  # NDVI ranges from 0 (no vegetation) to 1 (dense vegetation)
lulc_classes = np.random.randint(0, 5, n_samples)  # Simulated LULC categories: 0 = Urban, 1 = Water, etc.
elevation = np.random.uniform(0, 500, n_samples)  # Elevation in meters

# Simulated target: Land Surface Temperature (in °C), with inverse correlation to NDVI and direct to LULC + elevation
lst = 45 - (ndvi * 15) + (lulc_classes * 2) + (elevation * 0.01) + np.random.normal(0, 1, n_samples)

# -----------------------
# Step 2: Prepare Dataset
# -----------------------
df = pd.DataFrame({
    'NDVI': ndvi,
    'LULC': lulc_classes,
    'Elevation': elevation,
    'LST': lst
})

# One-hot encode LULC
df = pd.get_dummies(df, columns=['LULC'], prefix='LULC')

X = df.drop(columns=['LST'])
y = df['LST']

# -----------------------
# Step 3: Train-Test Split
# -----------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -----------------------
# Step 4: Train ML Model
# -----------------------
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# -----------------------
# Step 5: Evaluate Model
# -----------------------
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")

# -----------------------
# Step 6: Save Results to Excel
# -----------------------
results_df = X_test.copy()
results_df['True_LST'] = y_test.values
results_df['Predicted_LST'] = y_pred

results_df.to_excel("lst_modeling_results.xlsx", index=False)

# Optional: Plot true vs predicted
plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel("True LST (°C)")
plt.ylabel("Predicted LST (°C)")
plt.title("LST Prediction: True vs Predicted")
plt.grid(True)
plt.show()
