# Historical Validation

Using CERF allocation, people affected and testing out Scenarios 2 and 3.

In [1]:
%load_ext jupyter_black
%load_ext autoreload
%autoreload 2

In [2]:
import geopandas as gpd
import pandas as pd
from pathlib import Path
from shapely.geometry import LineString
import os
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
from src.datasources import codab, rsmc

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df_rsmc = rsmc.load_historical_forecast_distances()
distance_cols = [x for x in df_rsmc.columns if "_distance_km" in x]
df_rsmc["any_distance_km"] = df_rsmc[distance_cols].min(axis=1)

In [None]:
df_rsmc.plot()

<Axes: >

In [6]:
AA_DATA_DIR = os.getenv("AA_DATA_DIR")

In [7]:
cerf_emdat_df = pd.read_csv(
    Path(AA_DATA_DIR) / "public/exploration/mdg/cerf_emdat_bngrc_data.csv"
)

In [8]:
cerf_emdat_df[cerf_emdat_df["Nom"] == "BATSIRAI"]

Unnamed: 0,Nom,Total Affected - EMDAT,Sinistres,CERF Allocations
56,BATSIRAI,112115.0,166671.0,4476918.0


In [9]:
adm0_path = (
    Path(AA_DATA_DIR)
    / "public"
    / "raw"
    / "mdg"
    / "cod_ab"
    / "mdg_admbnda_adm0_BNGRC_OCHA_20181031.shp"
)
gdf_adm0 = gpd.read_file(adm0_path)

In [10]:
# Create buffer around gdf_adm0 with the given distance
# Reproject to a CRS that uses meters (e.g., EPSG:3857)
gdf_adm0_buffer = gdf_adm0.to_crs(epsg=3857)

# Apply the buffer of 100 km (100,000 meters)
gdf_adm0_buffer["geometry"] = gdf_adm0_buffer.geometry.buffer(
    100 * 1000
)  # 100 km = 100,000 meters

# Reproject back to the original CRS
gdf_adm0_buffer = gdf_adm0_buffer.to_crs(gdf_adm0.crs)

In [25]:
df_rsmc.columns

Index(['valid_time', 'lt_hour', 'latitude', 'longitude', 'max_wind_kt',
       'season', 'name', 'numberseason', 'issue_time', 'MG_distance_km',
       'any_distance_km'],
      dtype='object')

In [26]:
df_rsmc[df_rsmc["max_wind_kt"] >= 48]["name"].unique()

array(['0120102011', 'ALENGA', 'ANAIS', 'ADJALI', 'ANNABELLE', 'AVA',
       'ALICIA', 'ANA', 'ALVARO', 'ABELE', 'AMARA', 'BRANSBY', 'IRVING',
       'ALCIDE', 'BELNA', 'BATSIRAI', 'BELAL', 'BOLDWIN', 'BRUCE',
       'CORENTIN', 'BERGUITTA', 'BOUCHRA', 'AMBALI', 'BONGOYO', 'CANDICE',
       'BENILDE', 'CLAUDIA', 'BEJISA', 'KATE', 'CARLOS', 'KENANGA',
       'CALVINIA', 'CHALANE', 'DUMAKO', 'DARIAN', 'ANGGREK', 'BINGIZA',
       'CHANDA', 'DUMILE', 'BANSI', 'URIAH', 'DINEO', 'CEBILE', 'CILIDA',
       '0520202021', 'EMNATI', 'CHENESO', '0520232024', 'EMANG', 'COLIN',
       'CHEDZA', 'EMERAUDE', 'ENAWO', 'DUMAZILE', 'DIANE', 'DANILO',
       'FEZILE', 'DINGANI', 'DJOUNGOU', 'CHERONO', 'ETHEL', 'FELLENG',
       'DIAMONDRA', 'ELIAKIM', 'ELOISE', 'VERNON', 'FREDDY', 'ELEANOR',
       '0820102011', 'FUNSO', 'GINO', 'EUNICE', 'FANTALA', 'FAKIR',
       'FUNANI', 'ENALA', 'FILIPO', '0920102011', 'GIOVANNA', 'HARUNA',
       'FUNDI', 'FLAMBOYAN', 'GELENA', 'GABEKILE', 'GOMBE', 'GAMANE',
     

which storms are met for Scenario 2 and 3

In [29]:
# Scenario 2
# Readiness
scenario2_storms_readiness = df_rsmc[
    (df_rsmc["max_wind_kt"] >= 64) & (df_rsmc["lt_hour"].between(72, 120))
]["name"].unique()
scenario2_storms_action = df_rsmc[
    (df_rsmc["max_wind_kt"] >= 64) & (df_rsmc["lt_hour"].between(0, 72))
]["name"].unique()

In [30]:
# Scenario 3
# Readiness
scenario3_storms_readiness = df_rsmc[
    (df_rsmc["max_wind_kt"] >= 90) & (df_rsmc["lt_hour"].between(72, 120))
]["name"].unique()
scenario3_storms_action = df_rsmc[
    (df_rsmc["max_wind_kt"] >= 90) & (df_rsmc["lt_hour"].between(0, 72))
]["name"].unique()

In [31]:
cerf_emdat_df.columns

Index(['Nom', 'Total Affected - EMDAT', 'Sinistres', 'CERF Allocations',
       'Scenario 2', 'Scenario 3'],
      dtype='object')

In [33]:
cerf_emdat_df["Scenario 2 - Readiness"] = [
    storm in scenario2_storms_readiness for storm in cerf_emdat_df["Nom"]
]
cerf_emdat_df["Scenario 2 - Action"] = [
    storm in scenario2_storms_action for storm in cerf_emdat_df["Nom"]
]
cerf_emdat_df["Scenario 3 - Readiness"] = [
    storm in scenario3_storms_readiness for storm in cerf_emdat_df["Nom"]
]
cerf_emdat_df["Scenario 3 - Action"] = [
    storm in scenario3_storms_action for storm in cerf_emdat_df["Nom"]
]
df = cerf_emdat_df[
    [
        "Nom",
        "Scenario 2 - Readiness",
        "Scenario 2 - Action",
        "Scenario 3 - Readiness",
        "Scenario 3 - Action",
        "Total Affected - EMDAT",
        "Sinistres",
        "CERF Allocations",
    ]
]

# Sort the DataFrame by 'Total Affected' in descending order
# Round values in 'Total Affected' and 'CERF Allocations' columns
df_sorted = df.sort_values(by="Total Affected - EMDAT", ascending=False)


# Define functions for highlighting and coloring bars
def highlight_true(val):
    color = "red" if val else ""
    return f"background-color: {color}"


def color_bar_affected(val):
    if isinstance(val, (int, float)) and not pd.isna(val):
        return f'background: linear-gradient(90deg, orange {val/df_sorted["Total Affected - EMDAT"].max()*100}%, transparent {val/df_sorted["Total Affected - EMDAT"].max()*100}%);'
    return ""


def color_bar_sinistres(val):
    if isinstance(val, (int, float)) and not pd.isna(val):
        return f'background: linear-gradient(90deg, #FFD700 {val/df_sorted["Sinistres"].max()*100}%, transparent {val/df_sorted["Sinistres"].max()*100}%);'
    return ""


def color_bar_cerf(val):
    if isinstance(val, (int, float)) and not pd.isna(val):
        return f'background: linear-gradient(90deg, green {val/df_sorted["CERF Allocations"].max()*100}%, transparent {val/df_sorted["CERF Allocations"].max()*100}%);'
    return ""


# Apply styling
styled_df = (
    df_sorted.style.map(
        highlight_true,
        subset=[
            "Scenario 2 - Readiness",
            "Scenario 2 - Action",
            "Scenario 3 - Readiness",
            "Scenario 3 - Action",
        ],
    )
    .map(color_bar_affected, subset=["Total Affected - EMDAT"])
    .map(color_bar_sinistres, subset=["Sinistres"])
    .map(color_bar_cerf, subset=["CERF Allocations"])
    .format(
        {
            "Total Affected - EMDAT": lambda x: (
                f"{int(x):,}" if pd.notna(x) else ""
            ),  # Format with commas, no decimals, NaN as blank
            "Sinistres": lambda x: (
                f"{int(x):,}" if pd.notna(x) else ""
            ),  # Format with commas, no decimals, NaN as blank
            "CERF Allocations": lambda x: (
                f"{int(x):,}" if pd.notna(x) else ""
            ),  # Format with commas, no decimals, NaN as blank
        }
    )
    .set_table_styles(
        {"": [{"selector": "table", "props": "background-color: white;"}]}
    )
)

# Display the styled DataFrame
styled_df

Unnamed: 0,Nom,Scenario 2 - Readiness,Scenario 2 - Action,Scenario 3 - Readiness,Scenario 3 - Action,Total Affected - EMDAT,Sinistres,CERF Allocations
26,IVAN,False,False,False,False,524153.0,487146.0,4625583.0
46,ENAWO,True,True,True,True,434253.0,437443.0,4999601.0
63,FREDDY,True,True,True,True,299000.0,189352.0,7033283.0
38,GIOVANNA,True,True,True,True,250284.0,247014.0,1999893.0
22,INDLALA,False,False,False,False,215198.0,1740911.0,1230903.0
32,HUBERT,False,False,False,False,192132.0,,
44,CHEDZA,False,False,False,False,174007.0,,
58,EMNATI,True,True,True,True,169000.0,172178.0,1470268.0
47,AVA,True,True,False,False,161318.0,161328.0,
59,ANA,False,False,False,False,131555.0,,
