# Historical Validation

Using CERF allocation, people affected and testing out Scenarios 2 and 3.

In [24]:
%load_ext jupyter_black
%load_ext autoreload
%autoreload 2

The jupyter_black extension is already loaded. To reload it, use:
  %reload_ext jupyter_black
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
import geopandas as gpd
import pandas as pd
from pathlib import Path
from shapely.geometry import LineString
import os
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
from src.datasources import codab, rsmc
from src import constants

In [26]:
df_rsmc = rsmc.load_historical_forecast_distances()
distance_cols = [x for x in df_rsmc.columns if "_distance_km" in x]
df_rsmc["any_distance_km"] = df_rsmc[distance_cols].min(axis=1)

In [27]:
cerf_emdat_df = pd.read_csv(
    Path(constants.AA_DATA_DIR)
    / "public/exploration/mdg/cerf_emdat_bngrc_data.csv"
)

In [28]:
cerf_emdat_df[cerf_emdat_df["Nom"] == "BATSIRAI"]

Unnamed: 0,Nom,Total Affected - EMDAT,Sinistres,CERF Allocations
56,BATSIRAI,112115.0,166671.0,4476918.0


In [29]:
adm1_path = (
    Path(constants.AA_DATA_DIR)
    / "public"
    / "raw"
    / "mdg"
    / "cod_ab"
    / "mdg_admbnda_adm1_BNGRC_OCHA_20181031.shp"
)
gdf_adm1 = gpd.read_file(adm1_path)
gdf_adm1_sel = gdf_adm1[gdf_adm1["ADM1_EN"].isin(constants.ADMS)]

In [30]:
# Create buffer around gdf_adm1 with the given distance
# Reproject to a CRS that uses meters (e.g., EPSG:3857)
gdf_adm1_buffer = gdf_adm1_sel.to_crs(epsg=constants.mdg_epsg)

# Apply the buffer of 100 km (100,000 meters)
gdf_adm1_buffer["geometry"] = gdf_adm1_buffer.geometry.buffer(
    100 * 1000
)  # 100 km = 100,000 meters

# Reproject back to the original CRS
gdf_adm1_buffer = gdf_adm1_buffer.to_crs(gdf_adm1.crs)

which storms are met for Scenario 2 and 3

In [31]:
from shapely.geometry import Point

gdf_rsmc = gpd.GeoDataFrame(
    df_rsmc,
    geometry=[
        Point(lon, lat)
        for lon, lat in zip(df_rsmc["longitude"], df_rsmc["latitude"])
    ],
    crs="EPSG:4326",
)
storms_within_aoi = gpd.sjoin(
    gdf_rsmc, gdf_adm1_buffer, how="inner", predicate="intersects"
)["name"].unique()
storms_onland = gpd.sjoin(
    gdf_rsmc, gdf_adm1_sel, how="inner", predicate="intersects"
)["name"].unique()

In [32]:
storms_within_aoi

array(['ANAIS', '0120132014', 'ABELA', 'AVA', 'ANA', 'ALVARO', 'BELNA',
       'BATSIRAI', 'CHALANE', 'DUMAKO', 'BINGIZA', 'CHANDA', 'EMNATI',
       'CHENESO', 'DANDO', 'CHEDZA', 'ENAWO', 'DIANE', 'FELLENG',
       'ELIAKIM', 'ELOISE', 'FREDDY', 'ELEANOR', '0820132014', 'FANTALA',
       'FRANCISCO', 'FILIPO', 'GIOVANNA', 'HARUNA', 'GOMBE', 'GAMANE',
       'HEROLD', 'IRINA', '1120142015', 'HALIBA', 'JASMINE', 'HELLEN',
       'IMAN'], dtype=object)

In [33]:
# Scenario 2
# Readiness
scenario2_storms_readiness = df_rsmc[
    (df_rsmc["name"].isin(storms_within_aoi))
    & (df_rsmc["MG_distance_km"] <= 100)
    & (df_rsmc["max_wind_kt"] >= 64)
    & (df_rsmc["lt_hour"].between(72, 120))
]["name"].unique()
scenario2_storms_action = df_rsmc[
    (df_rsmc["name"].isin(storms_within_aoi))
    & (df_rsmc["MG_distance_km"] <= 100)
    & (df_rsmc["max_wind_kt"] >= 64)
    & (df_rsmc["lt_hour"].between(0, 72))
]["name"].unique()

In [34]:
# Scenario 3
# Readiness
scenario3_storms_readiness = df_rsmc[
    (df_rsmc["name"].isin(storms_within_aoi))
    & (df_rsmc["MG_distance_km"] <= 100)
    & (df_rsmc["max_wind_kt"] >= 90)
    & (df_rsmc["lt_hour"].between(72, 120))
]["name"].unique()
scenario3_storms_action = df_rsmc[
    (df_rsmc["name"].isin(storms_within_aoi))
    & (df_rsmc["MG_distance_km"] <= 100)
    & (df_rsmc["max_wind_kt"] >= 90)
    & (df_rsmc["lt_hour"].between(0, 72))
]["name"].unique()

In [35]:
# Scenario 2.5
scenario2_point_5_storms_readiness = df_rsmc[
    (
        (df_rsmc["name"].isin(storms_within_aoi))
        & (df_rsmc["MG_distance_km"] <= 100)
        & (df_rsmc["max_wind_kt"] >= 90)
        & (df_rsmc["lt_hour"].between(72, 120))
    )
    | (
        (df_rsmc["name"].isin(storms_onland))
        & (df_rsmc["MG_distance_km"] == 0)
        & (df_rsmc["max_wind_kt"] >= 64)
        & (df_rsmc["lt_hour"].between(72, 120))
    )
]["name"].unique()
scenario2_point_5_storms_action = df_rsmc[
    (
        (df_rsmc["name"].isin(storms_within_aoi))
        & (df_rsmc["MG_distance_km"] <= 100)
        & (df_rsmc["max_wind_kt"] >= 90)
        & (df_rsmc["lt_hour"].between(0, 72))
    )
    | (
        (df_rsmc["name"].isin(storms_onland))
        & (df_rsmc["MG_distance_km"] == 0)
        & (df_rsmc["max_wind_kt"] >= 64)
        & (df_rsmc["lt_hour"].between(0, 72))
    )
]["name"].unique()

In [36]:
cerf_emdat_df.columns

Index(['Nom', 'Total Affected - EMDAT', 'Sinistres', 'CERF Allocations'], dtype='object')

In [37]:
df_rsmc.columns

Index(['valid_time', 'lt_hour', 'latitude', 'longitude', 'max_wind_kt',
       'season', 'name', 'numberseason', 'issue_time', 'MG_distance_km',
       'any_distance_km'],
      dtype='object')

In [38]:
# adding year and season to output
cerf_emdat_df = cerf_emdat_df.merge(
    df_rsmc[["name", "season"]].drop_duplicates(),
    left_on="Nom",
    right_on="name",
    how="left",
)

In [39]:
cerf_emdat_df = cerf_emdat_df[cerf_emdat_df["Nom"].isin(storms_within_aoi)]
cerf_emdat_df

Unnamed: 0,Nom,Total Affected - EMDAT,Sinistres,CERF Allocations,name,season
34,BINGIZA,115215.0,267099.0,,BINGIZA,20102011
36,CHANDA,,,,CHANDA,20112012
37,DANDO,,,,DANDO,20112012
38,GIOVANNA,250284.0,247014.0,1999893.0,GIOVANNA,20112012
39,IRINA,85015.0,,,IRINA,20112012
40,HARUNA,40281.0,41655.0,3003483.0,HARUNA,20122013
43,HELLEN,1736.0,,,HELLEN,20132014
44,CHEDZA,174007.0,,,CHEDZA,20142015
46,ENAWO,434253.0,437443.0,4999601.0,ENAWO,20162017
47,AVA,161318.0,161328.0,,AVA,20172018


In [40]:
cerf_emdat_df["Scenario 2 - Readiness"] = [
    storm in scenario2_storms_readiness for storm in cerf_emdat_df["Nom"]
]
cerf_emdat_df["Scenario 2 - Action"] = [
    storm in scenario2_storms_action for storm in cerf_emdat_df["Nom"]
]
cerf_emdat_df["Scenario 2.5 - Readiness"] = [
    storm in scenario2_point_5_storms_readiness
    for storm in cerf_emdat_df["Nom"]
]
cerf_emdat_df["Scenario 2.5 - Action"] = [
    storm in scenario2_point_5_storms_action for storm in cerf_emdat_df["Nom"]
]
cerf_emdat_df["Scenario 3 - Readiness"] = [
    storm in scenario3_storms_readiness for storm in cerf_emdat_df["Nom"]
]
cerf_emdat_df["Scenario 3 - Action"] = [
    storm in scenario3_storms_action for storm in cerf_emdat_df["Nom"]
]
df = cerf_emdat_df[
    [
        "Nom",
        "season",
        "Scenario 2 - Readiness",
        "Scenario 2 - Action",
        "Scenario 2.5 - Readiness",
        "Scenario 2.5 - Action",
        "Scenario 3 - Readiness",
        "Scenario 3 - Action",
        "Total Affected - EMDAT",
        "Sinistres",
        "CERF Allocations",
    ]
]

# Sort the DataFrame by 'Total Affected' in descending order
# Round values in 'Total Affected' and 'CERF Allocations' columns
df_sorted = df.sort_values(by="Total Affected - EMDAT", ascending=False)


# Define functions for highlighting and coloring bars
def highlight_true(val):
    color = "red" if val else ""
    return f"background-color: {color}"


def color_bar_affected(val):
    if isinstance(val, (int, float)) and not pd.isna(val):
        return f'background: linear-gradient(90deg, orange {val/df_sorted["Total Affected - EMDAT"].max()*100}%, transparent {val/df_sorted["Total Affected - EMDAT"].max()*100}%);'
    return ""


def color_bar_sinistres(val):
    if isinstance(val, (int, float)) and not pd.isna(val):
        return f'background: linear-gradient(90deg, #FFD700 {val/df_sorted["Sinistres"].max()*100}%, transparent {val/df_sorted["Sinistres"].max()*100}%);'
    return ""


def color_bar_cerf(val):
    if isinstance(val, (int, float)) and not pd.isna(val):
        return f'background: linear-gradient(90deg, green {val/df_sorted["CERF Allocations"].max()*100}%, transparent {val/df_sorted["CERF Allocations"].max()*100}%);'
    return ""


# Apply styling
styled_df = (
    df_sorted.style.map(
        highlight_true,
        subset=[
            "Scenario 2 - Readiness",
            "Scenario 2 - Action",
            "Scenario 2.5 - Readiness",
            "Scenario 2.5 - Action",
            "Scenario 3 - Readiness",
            "Scenario 3 - Action",
        ],
    )
    .map(color_bar_affected, subset=["Total Affected - EMDAT"])
    .map(color_bar_sinistres, subset=["Sinistres"])
    .map(color_bar_cerf, subset=["CERF Allocations"])
    .format(
        {
            "season": lambda x: (x if pd.notna(x) else ""),
            "Total Affected - EMDAT": lambda x: (
                f"{int(x):,}" if pd.notna(x) else ""
            ),  # Format with commas, no decimals, NaN as blank
            "Sinistres": lambda x: (
                f"{int(x):,}" if pd.notna(x) else ""
            ),  # Format with commas, no decimals, NaN as blank
            "CERF Allocations": lambda x: (
                f"{int(x):,}" if pd.notna(x) else ""
            ),  # Format with commas, no decimals, NaN as blank
        }
    )
    .set_table_styles(
        {"": [{"selector": "table", "props": "background-color: white;"}]}
    )
)

# Display the styled DataFrame
styled_df

Unnamed: 0,Nom,season,Scenario 2 - Readiness,Scenario 2 - Action,Scenario 2.5 - Readiness,Scenario 2.5 - Action,Scenario 3 - Readiness,Scenario 3 - Action,Total Affected - EMDAT,Sinistres,CERF Allocations
46,ENAWO,20162017,True,True,True,True,True,True,434253.0,437443.0,4999601.0
63,FREDDY,20222023,True,True,True,True,True,True,299000.0,189352.0,7033283.0
38,GIOVANNA,20112012,True,True,True,True,True,True,250284.0,247014.0,1999893.0
44,CHEDZA,20142015,False,False,False,False,False,False,174007.0,,
58,EMNATI,20212022,True,True,True,True,True,True,169000.0,172178.0,1470268.0
47,AVA,20172018,False,True,False,True,False,False,161318.0,161328.0,
59,ANA,20212022,False,False,False,False,False,False,131555.0,,
34,BINGIZA,20102011,True,True,True,True,False,True,115215.0,267099.0,
56,BATSIRAI,20212022,True,True,True,True,True,True,112115.0,166671.0,4476918.0
62,CHENESO,20222023,False,False,False,False,False,False,90870.0,90519.0,
