In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# ----------------------------
# 1. Generate Synthetic Data
# ----------------------------

np.random.seed(42)
n_samples = 1000

# Remote sensing proxies and meteorological data
aod = np.random.uniform(0.1, 1.5, n_samples)          # Aerosol Optical Depth
lst = np.random.uniform(20, 45, n_samples)            # Land Surface Temperature (°C)
ndvi = np.random.uniform(0, 0.8, n_samples)           # Vegetation Index
wind_speed = np.random.uniform(0, 10, n_samples)      # Wind speed (m/s)

# AQI: simulate based on known contributors (inverse to wind & NDVI, direct to AOD, LST)
aqi = (
    50 + 
    (aod * 80) + 
    (lst * 1.5) - 
    (ndvi * 30) - 
    (wind_speed * 3) + 
    np.random.normal(0, 10, n_samples)  # add noise
)

# DataFrame
df = pd.DataFrame({
    'AOD': aod,
    'LST': lst,
    'NDVI': ndvi,
    'WindSpeed': wind_speed,
    'AQI': aqi
})

# ----------------------------
# 2. Train/Test Split
# ----------------------------

X = df[['AOD', 'LST', 'NDVI', 'WindSpeed']]
y = df['AQI']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ----------------------------
# 3. Train Model
# ----------------------------

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# ----------------------------
# 4. Predict and Evaluate
# ----------------------------

y_pred = model.predict(X_test)

print("R² Score:", r2_score(y_test, y_pred))
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))

# ----------------------------
# 5. Plot Results
# ----------------------------

plt.figure(figsize=(8, 5))
plt.scatter(y_test, y_pred, alpha=0.6, edgecolors='k')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], 'r--')
plt.xlabel("Actual AQI")
plt.ylabel("Predicted AQI")
plt.title("Actual vs Predicted AQI")
plt.grid(True)
plt.tight_layout()
plt.show()
