# SentinelGov Crisis Intelligence - ML Model Trainer

This Jupyter Notebook trains the **Random Forest Regressor** required for the SentinelGov `ml-service` to run actual AI inference. By executing these cells, you will create a `models/flood_rf_model.joblib` file representing the non-linear relationship between rainfall, social data, population, and flood severity.

In [None]:
!pip install scikit-learn pandas numpy joblib

In [None]:
import os
import numpy as np
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

def generate_synthetic_data(num_samples=15000):
    print(f"Generating {num_samples} synthetic telemetry records...")
    
    nom_len = int(num_samples * 0.4)
    nom_rain = np.random.uniform(0, 30, nom_len)
    nom_drain = np.random.uniform(0.7, 1.0, nom_len)
    nom_pop = np.random.uniform(0.1, 0.4, nom_len)
    nom_soc = np.random.uniform(0.0, 0.2, nom_len)
    
    mod_len = int(num_samples * 0.3)
    mod_rain = np.random.uniform(30, 80, mod_len)
    mod_drain = np.random.uniform(0.5, 0.9, mod_len)
    mod_pop = np.random.uniform(0.2, 0.6, mod_len)
    mod_soc = np.random.uniform(0.1, 0.5, mod_len)
    
    crit_len = num_samples - nom_len - mod_len
    crit_rain = np.random.uniform(80, 150, crit_len)
    crit_drain = np.random.uniform(0.0, 0.4, crit_len)
    crit_pop = np.random.uniform(0.5, 0.9, crit_len)
    crit_soc = np.random.uniform(0.6, 1.0, crit_len)
    
    rainfall = np.concatenate([nom_rain, mod_rain, crit_rain])
    drainage = np.concatenate([nom_drain, mod_drain, crit_drain])
    population = np.concatenate([nom_pop, mod_pop, crit_pop])
    social = np.concatenate([nom_soc, mod_soc, crit_soc])

    rain_norm = np.clip(rainfall / 150.0, 0, 1.0)
    drain_fail = 1.0 - drainage
    
    combined_geo_impact = (rain_norm ** 1.3) * 0.5 + (drain_fail ** 1.2) * 0.25 + (rain_norm * drain_fail) * 0.1
    social_impact = social * 0.15
    pop_impact = population * 0.10
    
    base_risk = combined_geo_impact + social_impact + pop_impact
    true_risk = np.clip(base_risk * 100, 0, 100)
    
    noise = np.random.normal(0, 3.5, num_samples)
    final_risk = np.clip(true_risk + noise, 0, 100)
    
    df = pd.DataFrame({
        'rainfall': rainfall,
        'drainageCapacity': drainage,
        'populationDensity': population,
        'socialSpike': social,
        'targetRisk': final_risk
    })
    
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)
    return df

In [None]:
df = generate_synthetic_data(15000)
display(df.head())

In [None]:
X = df[['rainfall', 'drainageCapacity', 'populationDensity', 'socialSpike']]
y = df['targetRisk']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training Random Forest Regressor (n_estimators=100, max_depth=12)...")
model = RandomForestRegressor(n_estimators=100, max_depth=12, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"\nâœ… Training Complete!")
print(f"Model MAE: {mae:.2f}")
print(f"Model RÂ²: {r2:.4f}")

In [None]:
importances = model.feature_importances_
print("Feature Importances:")
for f, imp in zip(X.columns, importances):
    print(f"  - {f}: {imp*100:.1f}%")

In [None]:
os.makedirs('models', exist_ok=True)
joblib_path = 'models/flood_rf_model.joblib'
joblib.dump(model, joblib_path)
print(f"\nðŸ’¾ Model exported to: {joblib_path}")
print("The SentinelGov FastAPI Server will automatically load this at runtime.")