# EWRI - Wildfire Risk Calculation
**`risk_enhanced` = EWRI score (Embeddings × Vulnerability)**

In [None]:
# SETTINGS & IMPORTS
COUNTY = "napa"  # Options: los_angeles, napa, suffolk, maricopa
PRE_FIRE_YEARS = [2017, 2018, 2019]

import pandas as pd, geopandas as gpd, numpy as np, h3, glob, os
from shapely.geometry import Point
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import roc_auc_score
from tqdm import tqdm
import warnings; warnings.filterwarnings('ignore')

BASE = "/home/network-lab/Desktop/EWRI"
PROCESSED = f"{BASE}/processed/{COUNTY}"
RAW = f"{BASE}/raw/{COUNTY}"
OUTPUT = f"{BASE}/outputs/{COUNTY}"
os.makedirs(OUTPUT, exist_ok=True)

print(f"County: {COUNTY.upper()}")

In [None]:
# LOAD DATA (4 parquet files)
emb = pd.read_parquet(f"{PROCESSED}/embeddings_h3.parquet")
sat = pd.read_parquet(f"{PROCESSED}/satellite_h3.parquet")
fema = pd.read_parquet(f"{PROCESSED}/fema_h3.parquet")[["h3_index", "SOVI_SCORE"]]
exposure = pd.read_parquet(f"{PROCESSED}/exposure_h3.parquet")

print(f"Embeddings: {emb.shape}")
print(f"Satellite: {sat.shape}")
print(f"FEMA: {fema.shape}")
print(f"Exposure: {exposure.shape}")

In [None]:
# FILTER EMBEDDINGS TO PRE-FIRE YEARS & MERGE ALL
emb_cols = ["h3_index"] + [c for c in emb.columns if any(c.endswith(f"_{y}") for y in PRE_FIRE_YEARS)]
emb = emb[emb_cols]
print(f"Embeddings filtered: {len(emb_cols)-1} features")

# Merge: inner(emb+sat), left(exposure), left(fema)
df = emb.merge(sat, on="h3_index", how="inner")
df = df.merge(exposure, on="h3_index", how="left")
df = df.merge(fema, on="h3_index", how="left")
print(f"Merged: {len(df):,} hexagons")

In [None]:
# LABEL BURNED HEXAGONS
fire_files = glob.glob(f"{RAW}/fire_data/*.shp") + glob.glob(f"{RAW}/fire_data/*.geojson")
fire = gpd.read_file(fire_files[0]).to_crs("EPSG:4326")
fire_union = fire.union_all()

df["geometry"] = [Point(*h3.cell_to_latlng(h)[::-1]) for h in tqdm(df["h3_index"], desc="H3→Point")]
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
df["burned"] = gdf.within(fire_union).astype(int)
df = df.drop(columns=["geometry"])

print(f"Burned: {df['burned'].sum():,} ({df['burned'].mean()*100:.2f}%)")

In [None]:
# NORMALIZE FEATURES & CREATE VULNERABILITY
emb_cols = [c for c in df.columns if c.startswith("A") and "_" in c]
sat_cols = [c for c in df.columns if c not in emb_cols + ["h3_index","burned","SOVI_SCORE","pop_2017","pop_2018","pop_2019","built_up"]]

for col in emb_cols + sat_cols:
    df[col] = df[col].fillna(0)
    if df[col].max() > df[col].min():
        df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())

df['pop_avg'] = (df['pop_2017'].fillna(0) + df['pop_2018'].fillna(0) + df['pop_2019'].fillna(0)) / 3
df['pop_norm'] = (df['pop_avg'] - df['pop_avg'].min()) / (df['pop_avg'].max() - df['pop_avg'].min() + 1e-9)
df['building_norm'] = (df['built_up'].fillna(0) - df['built_up'].min()) / (df['built_up'].max() - df['built_up'].min() + 1e-9)
df['sovi_norm'] = (df['SOVI_SCORE'] - df['SOVI_SCORE'].min()) / (df['SOVI_SCORE'].max() - df['SOVI_SCORE'].min() + 1e-9)
df['sovi_norm'] = df['sovi_norm'].fillna(df['sovi_norm'].median())
df['vulnerability'] = 0.45*df['pop_norm'] + 0.45*df['building_norm'] + 0.10*df['sovi_norm']
print(f"Vulnerability: min={df['vulnerability'].min():.4f}, max={df['vulnerability'].max():.4f}")

In [None]:
# FIRE SIGNATURES & HAZARD SCORES
burned_df = df[df["burned"] == 1]
print(f"Using {len(burned_df):,} burned hexagons for signature")

sat_sig = burned_df[sat_cols].mean().values.reshape(1, -1)
emb_sig = burned_df[emb_cols].mean().values.reshape(1, -1)

df["hazard_baseline"] = cosine_similarity(df[sat_cols].values, sat_sig).flatten()
df["hazard_enhanced"] = cosine_similarity(df[emb_cols].values, emb_sig).flatten()

print(f"hazard_baseline: {df['hazard_baseline'].min():.4f} to {df['hazard_baseline'].max():.4f}")
print(f"hazard_enhanced: {df['hazard_enhanced'].min():.4f} to {df['hazard_enhanced'].max():.4f}")

In [None]:
# RISK SCORES
df["risk_baseline"] = df["hazard_baseline"] * df["vulnerability"]
df["risk_enhanced"] = df["hazard_enhanced"] * df["vulnerability"]

df["risk_category"] = pd.cut(df["risk_enhanced"].rank(pct=True), 
                              bins=[0,0.5,0.7,0.9,1], labels=["Low","Moderate","High","Very High"])
print(df["risk_category"].value_counts())

In [None]:
# VALIDATION (AUC + Fire Capture @ 20%)
print(f"{'='*50}")
print(f"VALIDATION - {COUNTY.upper()}")
print(f"{'='*50}")
auc_s = roc_auc_score(df['burned'], df['hazard_baseline'])
auc_e = roc_auc_score(df['burned'], df['hazard_enhanced'])
fc_s = df[df['hazard_baseline'] >= df['hazard_baseline'].quantile(0.8)]['burned'].sum() / df['burned'].sum() * 100
fc_e = df[df['hazard_enhanced'] >= df['hazard_enhanced'].quantile(0.8)]['burned'].sum() / df['burned'].sum() * 100
print(f"AUC-ROC:  Satellite={auc_s:.4f}  Embeddings={auc_e:.4f}  (Δ +{(auc_e-auc_s)/auc_s*100:.0f}%)")
print(f"FC@20%:   Satellite={fc_s:.1f}%     Embeddings={fc_e:.1f}%")

In [None]:
# SAVE OUTPUT
output_cols = ["h3_index","hazard_baseline","hazard_enhanced","vulnerability",
               "risk_baseline","risk_enhanced","risk_category","burned"]
output = df[output_cols]
output_path = f"{OUTPUT}/{COUNTY}_EWRI_final.csv"
output.to_csv(output_path, index=False)
print(f"✓ Saved: {output_path} ({len(output):,} hexagons)")

In [84]:
# ALL COUNTIES SUMMARY (Run after processing all 4)
print("="*75)
print("THESIS RESULTS: AUC-ROC & Fire Capture @ 20%")
print("="*75)
print(f"{'County':<12} {'Fire%':>6} {'AUC(Sat)':>9} {'AUC(Emb)':>9} {'Δ':>6} {'FC@20(S)':>9} {'FC@20(E)':>9}")
print("-"*75)

aucs_s, aucs_e, fcs_s, fcs_e = [], [], [], []
for county in ["los_angeles", "napa", "suffolk", "maricopa"]:
    path = f"{BASE}/outputs/{county}/{county}_EWRI_final.csv"
    if os.path.exists(path):
        d = pd.read_csv(path)
        a_s = roc_auc_score(d['burned'], d['hazard_baseline'])
        a_e = roc_auc_score(d['burned'], d['hazard_enhanced'])
        f_s = d[d['hazard_baseline'] >= d['hazard_baseline'].quantile(0.8)]['burned'].sum() / d['burned'].sum() * 100
        f_e = d[d['hazard_enhanced'] >= d['hazard_enhanced'].quantile(0.8)]['burned'].sum() / d['burned'].sum() * 100
        print(f"{county:<12} {d['burned'].mean()*100:>5.1f}% {a_s:>9.2f} {a_e:>9.2f} {'+'+str(int((a_e-a_s)/a_s*100))+'%':>6} {f_s:>8.0f}% {f_e:>8.0f}%")
        aucs_s.append(a_s); aucs_e.append(a_e); fcs_s.append(f_s); fcs_e.append(f_e)
    else:
        print(f"{county:<12} {'—':>6} {'—':>9} {'—':>9} {'—':>6} {'—':>9} {'—':>9}")

print("-"*75)
if aucs_s:
    print(f"{'AVERAGE':<12} {'':>6} {np.mean(aucs_s):>9.2f} {np.mean(aucs_e):>9.2f} {'+'+str(int((np.mean(aucs_e)-np.mean(aucs_s))/np.mean(aucs_s)*100))+'%':>6} {np.mean(fcs_s):>8.0f}% {np.mean(fcs_e):>8.0f}%")
print("="*75)

THESIS RESULTS: AUC-ROC & Fire Capture @ 20%
County        Fire%  AUC(Sat)  AUC(Emb)      Δ  FC@20(S)  FC@20(E)
---------------------------------------------------------------------------
los_angeles    2.4%      0.87      0.91    +5%       71%       89%
napa           2.2%      0.68      0.82   +20%       44%       68%
suffolk        2.3%      0.65      0.77   +19%       44%       52%
maricopa       1.8%      0.69      0.93   +34%       57%       89%
---------------------------------------------------------------------------
AVERAGE                  0.72      0.86   +19%       54%       75%
