# Historical Validation

Using CERF allocation, people affected and testing out Scenarios 2 and 3.

In [1]:
%load_ext jupyter_black
%load_ext autoreload
%autoreload 2

In [2]:
import geopandas as gpd
import pandas as pd
from pathlib import Path
from shapely.geometry import LineString
import os
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np

In [3]:
AA_DATA_DIR = os.getenv("AA_DATA_DIR")

In [6]:
# loading all actual cyclone tracks
cyclone_tracks = pd.read_csv(
    Path(AA_DATA_DIR)
    / "private"
    / "raw"
    / "moz"
    / "rsmc"
    / "data_cyclone_SWIO_19851986_to_20222023.csv"
)

In [7]:
cyclone_tracks["Lat"] = cyclone_tracks["Lat"].apply(
    lambda x: -x if x > 0 else x
)
cyclone_tracks_buffer = cyclone_tracks.copy()
cyclone_tracks_buffer["RMW_km"] = cyclone_tracks_buffer["RMW (mn)"] * 1.852

In [4]:
cerf_emdat_df = pd.read_csv(
    Path(AA_DATA_DIR) / "public/exploration/mdg/cerf_emdat_bngrc_data.csv"
)

In [5]:
cerf_emdat_df[cerf_emdat_df["Nom"] == "BATSIRAI"]

Unnamed: 0,Nom,Total Affected - EMDAT,Sinistres,CERF Allocations
56,BATSIRAI,112115.0,166671.0,4476918.0


In [10]:
cyclone_tracks[cyclone_tracks["Name"] == "EMNATI"]["Max wind (kt)"].unique()

array([25., 30., 27., 33., 40., 43., 53., 60., 62., 70., 80., 85., 95.,
       78., 75., 50., 45., 48., 35., 20.])

In [11]:
cyclone_tracks[cyclone_tracks["Name"] == "BATSIRAI"]["Max wind (kt)"].unique()

array([ 20.,  22.,  23.,  25.,  28.,  30.,  40.,  50.,  60.,  85.,  nan,
        45.,  65.,  80.,  90.,  95.,  70., 100., 110.,  35.,  32.,  37.,
        43.,  47.,  42.])

In [12]:
adm0_path = (
    Path(AA_DATA_DIR)
    / "public"
    / "raw"
    / "mdg"
    / "cod_ab"
    / "mdg_admbnda_adm0_BNGRC_OCHA_20181031.shp"
)
gdf_adm0 = gpd.read_file(adm0_path)

In [13]:
# Create buffer around gdf_adm0 with the given distance
# Reproject to a CRS that uses meters (e.g., EPSG:3857)
gdf_adm0_buffer = gdf_adm0.to_crs(epsg=3857)

# Apply the buffer of 100 km (100,000 meters)
gdf_adm0_buffer["geometry"] = gdf_adm0_buffer.geometry.buffer(
    100 * 1000
)  # 100 km = 100,000 meters

# Reproject back to the original CRS
gdf_adm0_buffer = gdf_adm0_buffer.to_crs(gdf_adm0.crs)

In [14]:
cyclone_tracks_sel_2006 = cyclone_tracks[cyclone_tracks["Year"] >= 2006]

In [15]:
cyclones_since_2006 = cyclone_tracks_sel_2006[
    cyclone_tracks_sel_2006["Max wind (kt)"] >= 48
]["Name"].unique()

In [16]:
# which storms are met for Scenario 2 and 3
scenario2_storms = cyclone_tracks_sel_2006[
    cyclone_tracks_sel_2006["Max wind (kt)"] >= 64
]["Name"].unique()
scenario3_storms = cyclone_tracks_sel_2006[
    cyclone_tracks_sel_2006["Max wind (kt)"] >= 90
]["Name"].unique()

In [17]:
cerf_emdat_df.columns

Index(['Nom', 'Total Affected - EMDAT', 'Sinistres', 'CERF Allocations'], dtype='object')

In [18]:
cerf_emdat_df = cerf_emdat_df[cerf_emdat_df["Nom"].isin(cyclones_since_2006)]

In [19]:
cerf_emdat_df["Scenario 2"] = [
    storm in scenario2_storms for storm in cerf_emdat_df["Nom"]
]
cerf_emdat_df["Scenario 3"] = [
    storm in scenario3_storms for storm in cerf_emdat_df["Nom"]
]

df = cerf_emdat_df[
    [
        "Nom",
        "Scenario 2",
        "Scenario 3",
        "Total Affected - EMDAT",
        "Sinistres",
        "CERF Allocations",
    ]
]

# Sort the DataFrame by 'Total Affected' in descending order
# Round values in 'Total Affected' and 'CERF Allocations' columns
df_sorted = df.sort_values(by="Total Affected - EMDAT", ascending=False)


# Define functions for highlighting and coloring bars
def highlight_true(val):
    color = "red" if val else ""
    return f"background-color: {color}"


def color_bar_affected(val):
    if isinstance(val, (int, float)) and not pd.isna(val):
        return f'background: linear-gradient(90deg, orange {val/df_sorted["Total Affected - EMDAT"].max()*100}%, transparent {val/df_sorted["Total Affected - EMDAT"].max()*100}%);'
    return ""


def color_bar_sinistres(val):
    if isinstance(val, (int, float)) and not pd.isna(val):
        return f'background: linear-gradient(90deg, #FFD700 {val/df_sorted["Sinistres"].max()*100}%, transparent {val/df_sorted["Sinistres"].max()*100}%);'
    return ""


def color_bar_cerf(val):
    if isinstance(val, (int, float)) and not pd.isna(val):
        return f'background: linear-gradient(90deg, green {val/df_sorted["CERF Allocations"].max()*100}%, transparent {val/df_sorted["CERF Allocations"].max()*100}%);'
    return ""


# Apply styling
styled_df = (
    df_sorted.style.map(
        highlight_true,
        subset=[
            "Scenario 2",
            "Scenario 3",
        ],
    )
    .map(color_bar_affected, subset=["Total Affected - EMDAT"])
    .map(color_bar_sinistres, subset=["Sinistres"])
    .map(color_bar_cerf, subset=["CERF Allocations"])
    .format(
        {
            "Total Affected - EMDAT": lambda x: (
                f"{int(x):,}" if pd.notna(x) else ""
            ),  # Format with commas, no decimals, NaN as blank
            "Sinistres": lambda x: (
                f"{int(x):,}" if pd.notna(x) else ""
            ),  # Format with commas, no decimals, NaN as blank
            "CERF Allocations": lambda x: (
                f"{int(x):,}" if pd.notna(x) else ""
            ),  # Format with commas, no decimals, NaN as blank
        }
    )
    .set_table_styles(
        {"": [{"selector": "table", "props": "background-color: white;"}]}
    )
)

# Display the styled DataFrame
styled_df

Unnamed: 0,Nom,Scenario 2,Scenario 3,Total Affected - EMDAT,Sinistres,CERF Allocations
26,IVAN,True,True,524153.0,487146.0,4625583.0
46,ENAWO,True,True,434253.0,437443.0,4999601.0
63,FREDDY,True,True,299000.0,189352.0,7033283.0
38,GIOVANNA,True,True,250284.0,247014.0,1999893.0
22,INDLALA,True,True,215198.0,1740911.0,1230903.0
32,HUBERT,False,False,192132.0,,
44,CHEDZA,False,False,174007.0,,
58,EMNATI,True,True,169000.0,172178.0,1470268.0
47,AVA,True,False,161318.0,161328.0,
59,ANA,False,False,131555.0,,
