# Setting some Thresholds for Rainfall

In [52]:
%load_ext jupyter_black
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from pathlib import Path
import geopandas as gpd
from azure.storage.blob import ContainerClient
import warnings
import plotly.express as px
import plotly.graph_objects as go
import datetime
from datetime import timedelta

The jupyter_black extension is already loaded. To reload it, use:
  %reload_ext jupyter_black


In [26]:
warnings.filterwarnings("ignore")
AA_DATA_DIR = Path(os.getenv("AA_DATA_DIR"))
AA_DATA_DIR_NEW = Path(os.getenv("AA_DATA_DIR_NEW"))

ADMS = ["Sofala", "Inhambane", "Nampula", "Zambezia"]

In [5]:
emdat_path = (
    AA_DATA_DIR_NEW
    / "private"
    / "processed"
    / "glb"
    / "emdat"
    / "emdat-tropicalcyclone-2000-2022-processed-sids.csv"
)

ibtracs_path = (
    Path(AA_DATA_DIR)
    / "public"
    / "raw"
    / "glb"
    / "ibtracs"
    / "IBTrACS.SI.list.v04r01.points/IBTrACS.SI.list.v04r01.points.shp"
)

adm1_path = (
    AA_DATA_DIR
    / "public"
    / "raw"
    / "moz"
    / "cod_ab"
    / "moz_admbnda_adm1_ine_20190607.shp"
)

df_emdat = pd.read_csv(emdat_path)
gdf_adm1 = gpd.read_file(adm1_path)
gdf_ibtracs = gpd.read_file(ibtracs_path)

gdf_adm1_sel = gdf_adm1[gdf_adm1.ADM1_PT.isin(ADMS)]
df_emdat = df_emdat[df_emdat.iso3 == "MOZ"]  # Just want the impacts to Mozambique

In [6]:
imerg_path = (
    Path(AA_DATA_DIR) / "public" / "processed" / "moz" / "daily_imerg_precip_adm1.csv"
)

# TODO: Take the 2-day rolling sum?
df_imerg = pd.read_csv(imerg_path)
df_imerg = df_imerg[df_imerg.ADM1.isin(ADMS)]
df_imerg["date"] = pd.to_datetime(df_imerg["date"])

## Get landfall dates per cyclone and check dates after landfall

In [177]:
gdf_adm1_sel_buff = gdf_adm1[gdf_adm1.ADM1_PT.isin(ADMS)].buffer(50 / 111)
# also making sure to take one time step before landfall since some storms even off shore can cause a lot of rain
gdf_ibtracs_time = gdf_ibtracs[gdf_ibtracs["year"] >= 2003]
# which cyclones made landfall or came close by around 50km to land
landfall_cyclones = gpd.sjoin(
    gdf_ibtracs_time, gdf_adm1_sel, how="inner", predicate="intersects"
)["NAME"].unique()

In [244]:
cyc_ls = []
for cyc in landfall_cyclones:
    cyc_df = gdf_ibtracs_time[gdf_ibtracs_time["NAME"] == cyc]
    cyc_df["date"] = pd.to_datetime(cyc_df["ISO_TIME"]).dt.date
    cyc_sjoin = gpd.sjoin(cyc_df, gdf_adm1_sel, how="left", predicate="within")
    cyc_df["ADM1_PT"] = cyc_sjoin["ADM1_PT"]
    cyc_df["actual_within_land"] = cyc_sjoin["index_right"].notna()
    cyc_df["point_location"] = np.where(
        cyc_df["actual_within_land"], "Within", "Outside"
    )
    first_landfall = (
        cyc_df[cyc_df["actual_within_land"]].index[0]
        if not cyc_df[cyc_df["actual_within_land"]].empty
        else None
    )
    cyc_df.loc[cyc_df.index == first_landfall, "point_location"] = "Landfall"
    landfall_time = pd.to_datetime(
        cyc_df[cyc_df["point_location"] == "Landfall"]["ISO_TIME"].values[0]
    )
    df_imerg_cyc = df_imerg[
        (df_imerg["date"] >= landfall_time - timedelta(days=2))
        & (df_imerg["date"] <= landfall_time + timedelta(days=6))
    ]
    df_imerg_cyc.drop(columns=["Unnamed: 0"], inplace=True)
    if len(df_imerg_cyc) == 0:
        continue
    df_wide = (
        df_imerg_cyc.pivot(index="date", columns="ADM1", values="total_precipitation")
        .add_suffix("_rainfall")
        .reset_index()
    )
    df_wide["date"] = pd.to_datetime(df_wide["date"]).dt.date
    merged_df = pd.merge(cyc_df, df_wide, on="date", how="right")
    merged_df = pd.merge(
        merged_df,
        df_emdat[["sid", "Total Affected"]],
        left_on="SID",
        right_on="sid",
        how="outer",
    )
    cyc_location = (
        cyc_df.groupby(["date"])
        .apply(
            lambda x: (
                "Landfall"
                if "Landfall" in x["point_location"].values
                else x["point_location"].unique()[0]
            )
        )
        .reset_index(name="status")
    )
    mode_columns = [
        "ADM1_PT",
        "Inhambane_rainfall",
        "Nampula_rainfall",
        "Sofala_rainfall",
        "Zambezia_rainfall",
        "Total Affected",
    ]
    mode_df = (
        merged_df.groupby("date")[mode_columns]
        .apply(lambda x: x.mode().iloc[0])
        .reset_index()
    )
    cyclone_df = pd.merge(cyc_location, mode_df, on="date", how="right")
    cyclone_df["storm"] = cyc
    cyc_ls.append(cyclone_df)

complete_df = pd.concat(cyc_ls)

In [245]:
landfall_dates = (
    complete_df[complete_df["status"] == "Landfall"]
    .set_index("storm")["date"]
    .to_dict()
)

# Calculate relative time steps
complete_df["relative_day"] = complete_df.apply(
    lambda row: (
        (row["date"] - landfall_dates.get(row["storm"], pd.NaT)).days
        if row["storm"] in landfall_dates
        else None
    ),
    axis=1,
)

In [247]:
percentile_80 = (
    complete_df.groupby("relative_day")[
        [
            "Inhambane_rainfall",
            "Nampula_rainfall",
            "Sofala_rainfall",
            "Zambezia_rainfall",
        ]
    ]
    .quantile(0.80)
    .reset_index()
)

In [249]:
# Merge the percentile thresholds back into the original DataFrame
complete_df = pd.merge(
    complete_df, percentile_80, on="relative_day", suffixes=("", "_80th")
)

In [251]:
complete_df["Inhambane_trigger"] = (
    complete_df["Inhambane_rainfall"] > complete_df["Inhambane_rainfall_80th"]
)
complete_df["Nampula_trigger"] = (
    complete_df["Nampula_rainfall"] > complete_df["Nampula_rainfall_80th"]
)
complete_df["Sofala_trigger"] = (
    complete_df["Sofala_rainfall"] > complete_df["Sofala_rainfall_80th"]
)
complete_df["Zambezia_trigger"] = (
    complete_df["Zambezia_rainfall"] > complete_df["Zambezia_rainfall_80th"]
)

In [257]:
# Aggregate average rainfall per day
avg_rainfall_per_day = (
    complete_df.groupby("relative_day")
    .agg(
        {
            "Inhambane_rainfall": "mean",
            "Nampula_rainfall": "mean",
            "Sofala_rainfall": "mean",
            "Zambezia_rainfall": "mean",
        }
    )
    .reset_index()
)

# Create a Plotly figure
fig = go.Figure()

# Add traces for each rainfall column
fig.add_trace(
    go.Scatter(
        x=avg_rainfall_per_day["relative_day"],
        y=avg_rainfall_per_day["Inhambane_rainfall"],
        mode="lines+markers",
        name="Inhambane",
        marker=dict(size=8),
    )
)

fig.add_trace(
    go.Scatter(
        x=avg_rainfall_per_day["relative_day"],
        y=avg_rainfall_per_day["Nampula_rainfall"],
        mode="lines+markers",
        name="Nampula",
        marker=dict(size=8),
    )
)

fig.add_trace(
    go.Scatter(
        x=avg_rainfall_per_day["relative_day"],
        y=avg_rainfall_per_day["Sofala_rainfall"],
        mode="lines+markers",
        name="Sofala",
        marker=dict(size=8),
    )
)

fig.add_trace(
    go.Scatter(
        x=avg_rainfall_per_day["relative_day"],
        y=avg_rainfall_per_day["Zambezia_rainfall"],
        mode="lines+markers",
        name="Zambezia",
        marker=dict(size=8),
    )
)

# Update layout
fig.update_layout(
    title="Total Rainfall Per Day",
    xaxis_title="Days Relative to Cyclone Landfall",
    yaxis_title="Total Rainfall (mm)",
    legend_title="Regions",
    template="plotly_white",  # You can change to 'plotly_white' or any other theme
)

# Show the plot
fig.show()

In [278]:
# Filter data to include only landfall events
landfall_data = complete_df[complete_df["status"] == "Landfall"]

# Melt the DataFrame to long format for aggregation
rainfall_long = landfall_data.melt(
    id_vars=["ADM1_PT"],
    value_vars=[
        "Inhambane_rainfall",
        "Nampula_rainfall",
        "Sofala_rainfall",
        "Zambezia_rainfall",
    ],
    var_name="province_rainfall",
    value_name="rainfall",
)

# Extract province names from column names
rainfall_long["province"] = rainfall_long["province_rainfall"].str.replace(
    "_rainfall", ""
)

# Calculate average rainfall for each province based on ADM1_PT
average_rainfall = (
    rainfall_long.groupby(["ADM1_PT", "province"])["rainfall"].mean().reset_index()
)

# Pivot the DataFrame to get provinces as columns
pivot_rainfall = average_rainfall.pivot(
    index="ADM1_PT", columns="province", values="rainfall"
).fillna(0)

# Create a bar plot with Plotly
fig = go.Figure()

# Add bar traces for each province
for province in pivot_rainfall.columns:
    fig.add_trace(
        go.Bar(
            x=pivot_rainfall.index,
            y=pivot_rainfall[province],
            name=province,
            # text=pivot_rainfall[province].apply(lambda x: f"{x:,.0f}"),
            textposition="auto",
        )
    )

# Update layout
fig.update_layout(
    title="Average Rainfall at Landfall by Province",
    xaxis_title="Landfall Province",
    yaxis_title="Average Rainfall (mm)",
    barmode="group",  # Stack bars to show totals for each landfall province
    template="plotly_white",
)

fig.show()