# Investigating Precipitation Trends During Cyclones in Mozambique

This notebook contains an exploratory analysis to determine the feasibility of using observational precipitation data from IMERG as a readiness trigger for cyclones in Mozambique. 

In [1]:
%load_ext jupyter_black
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
from dotenv import load_dotenv
from pathlib import Path
import geopandas as gpd
from azure.storage.blob import ContainerClient
import warnings
import plotly.express as px
import plotly.graph_objects as go
import datetime
from src.constants import *
from src.datasources import rsmc, helpers
from src.utils import *

# Raised as part of interpolating with Pandas
warnings.simplefilter(action="ignore", category=FutureWarning)

load_dotenv()

dev_glb_container_client = ContainerClient.from_container_url(DEV_BLOB_GLB_URL)

px.set_mapbox_access_token(os.getenv("MAPBOX_ACCESS_TOKEN"))

## Loading in raw data

We're using: 
- [EM-DAT](https://www.emdat.be/) data that has been preprocessed to include cyclone `SID`s from IBTrACs. We use the `total_affected` data as an indication of cyclone impact.
- [IBTrACS](https://www.ncei.noaa.gov/products/international-best-track-archive) data on historical cyclone paths
- Mozambique ADM1 boundaries
- Daily precipitation observations from [IMERG](https://gpm.nasa.gov/data/imerg). The gridded observations have been aggregated to ADM1 levels in Mozambique. See `03_process_imerg.ipynb` for details.

In [3]:
emdat_path = (
    AA_DATA_DIR_NEW
    / "private"
    / "processed"
    / "glb"
    / "emdat"
    / "emdat-tropicalcyclone-2000-2022-processed-sids.csv"
)

ibtracs_path = (
    Path(AA_DATA_DIR)
    / "public"
    / "raw"
    / "glb"
    / "ibtracs"
    / "IBTrACS.SI.list.v04r01.points/IBTrACS.SI.list.v04r01.points.shp"
)

adm1_path = (
    AA_DATA_DIR
    / "public"
    / "raw"
    / "moz"
    / "cod_ab"
    / "moz_admbnda_adm1_ine_20190607.shp"
)

df_emdat = pd.read_csv(emdat_path)
gdf_adm1 = gpd.read_file(adm1_path)
gdf_ibtracs = gpd.read_file(ibtracs_path)

gdf_adm1_sel = gdf_adm1[gdf_adm1.ADM1_PT.isin(ADMS)]
df_emdat = df_emdat[df_emdat.iso3 == "MOZ"]  # Just want the impacts to Mozambique

## Summarizing historical cyclones

In [None]:
# combining US Wind and Reunion for those with no wind speed
gdf_ibtracs["REU_USA_WIND"] = gdf_ibtracs["REU_WIND"].fillna(
    gdf_ibtracs["USA_WIND"] * MIN1_TO_MIN10
)

df_ibtracs = gdf_ibtracs[["SID", "NAME", "REU_USA_WIND", "ISO_TIME", "LAT", "LON"]]
df_ibtracs.loc[:, "ISO_TIME"] = pd.to_datetime(df_ibtracs["ISO_TIME"])
df_ibtracs = df_ibtracs.dropna(subset="REU_USA_WIND")

# Interpolate the IBTrACS points to 30min frequency
gdfs = []
for sid, group in df_ibtracs.groupby("SID"):
    df_interp = (
        group.set_index("ISO_TIME").resample("30min").interpolate().reset_index()
    )
    df_interp["SID"] = sid
    df_interp["NAME"] = df_interp.iloc[0].NAME
    gdf_interp = gpd.GeoDataFrame(
        data=df_interp,
        geometry=gpd.points_from_xy(df_interp["LON"], df_interp["LAT"]),
        crs=4326,
    )
    gdfs.append(gdf_interp)

gdf_ibtracs_interp = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))

In [5]:
# Filter out the points that don't make landfall with the coastal provinces in Mozambique
gdf_ibtracs_interp_sel = gpd.sjoin(
    gdf_ibtracs_interp, gdf_adm1_sel, how="inner", predicate="intersects"
)[
    [
        "ISO_TIME",
        "SID",
        "NAME",
        "REU_USA_WIND",
        "LAT",
        "LON",
        "geometry",
        "ADM1_PT",
        "ADM1_PCODE",
    ]
]

In [6]:
# Now group and join to get one row per event, with relevant variables added in

df_ibtracs_sum = (
    gdf_ibtracs_interp_sel.groupby(["SID", "NAME"])
    .agg({"REU_USA_WIND": "max", "ISO_TIME": "min"})
    .reset_index()
)

# TODO: Why are there some NA ADM1 values?
df_cyclone_summary = (
    df_ibtracs_sum.merge(
        gdf_ibtracs_interp_sel[["SID", "NAME", "ADM1_PT", "REU_USA_WIND", "ISO_TIME"]],
        on=["SID", "NAME", "REU_USA_WIND", "ISO_TIME"],
        how="left",
    )
    .merge(
        df_emdat[["sid", "Total Affected"]], left_on="SID", right_on="sid", how="left"
    )
    .drop(axis=1, columns=["sid"])
    .rename(
        columns={
            "REU_USA_WIND": "MAX_SPEED_ON_LAND",
            "ADM1_PT": "ADM1_OF_MAX",
            "ISO_TIME": "TIME_OF_MAX",
            "Total Affected": "TOTAL_AFFECTED",
        }
    )
)

# Note that these classifications are based on JUST speeds while the cyclone is over our AOI
# So may not necessarily reflect the global classification of the cyclone
df_cyclone_summary["SEVERE_TROPICAL_STORM"] = (
    df_cyclone_summary["MAX_SPEED_ON_LAND"] >= THRESHOLD_SPEED_OPT1
) & (df_cyclone_summary["MAX_SPEED_ON_LAND"] < THRESHOLD_SPEED_OPT2)
df_cyclone_summary["TYPHOON"] = (
    df_cyclone_summary["MAX_SPEED_ON_LAND"] >= THRESHOLD_SPEED_OPT2
)

# Also create a column with just the day, for matching against daily precipitation data
df_cyclone_summary["DAY_OF_MAX"] = pd.to_datetime(
    df_cyclone_summary["TIME_OF_MAX"].dt.strftime("%Y-%m-%d")
)

## Integrating precipitation

Now load in the precipitation data from the processed IMERG files

In [8]:
imerg_path = (
    Path(AA_DATA_DIR) / "public" / "processed" / "moz" / "daily_imerg_precip_adm1.csv"
)

# TODO: Take the 2-day rolling sum?
df_imerg = pd.read_csv(imerg_path)
df_imerg = df_imerg[df_imerg.ADM1.isin(ADMS)]
df_imerg["date"] = pd.to_datetime(df_imerg["date"])

Create a simple plot to sanity check

In [9]:
fig = px.line(
    df_imerg,
    x="date",
    y="total_precipitation",
    facet_row="ADM1",
    title="IMERG Observed Daily Precipitation Across Selected Provinces in Mozambique",
    template="simple_white",
)
fig.update_yaxes(tickangle=45)
fig.show()

In [10]:
df_cyclone_precip = df_cyclone_summary.merge(
    df_imerg, left_on="DAY_OF_MAX", right_on="date", how="left"
)

df_cyclone_precip = (
    df_cyclone_precip.drop(columns=["Unnamed: 0", "date"])
    .rename(columns={"total_precipitation": "TOTAL_PRECIPITATION"})
    .dropna(
        subset="TOTAL_PRECIPITATION"
    )  # Since we don't have precipitation before 2003
)

For cyclone events, what trends are there in the observed daily total precipitation per province and the maximum windspeed over land in Mozambique? We can break this down by province, however the max wind speed is independent of province and is the max across the total overall area. We can also color points by the estimates of impact (total affected population in the country) for each cyclone. Note that many of the cyclones from IBTrACS don't have EM-DAT estimates.

Some observations:
- We do see that higher impact cyclones **do** often have higher max wind speeds (Idai and Dineo, for example)
- Observed daily precipitation is not necessarily a good indicator of the cyclone hitting. Cyclone ANA had a 

Next steps:
- Need to do some more investigation on approaches for aggregating or summing up precipitation from IMERG. Could be: calculating 2 or 3 day rolling sums
- Might also be worth calculating return periods for precipitation values to identify what might be notable outlier observations

In [11]:
fig = px.scatter(
    df_cyclone_precip,
    x="TOTAL_PRECIPITATION",
    y="MAX_SPEED_ON_LAND",
    color="TOTAL_AFFECTED",
    facet_col="ADM1",
    template="simple_white",
    color_continuous_scale=px.colors.sequential.Reds[2:7],
    hover_data=["NAME", "DAY_OF_MAX"],
    title="Daily Precipitation vs Max Wind Speed for Cyclones Making Landfall in Mozambique",
)
fig.update_traces(marker_size=10)
fig.show()

## Some case studies

In [48]:
def plot_precip(name, df_imerg, gdf_ibtracs):

    cyclone_start = list(gdf_ibtracs[gdf_ibtracs.NAME == name].reset_index().ISO_TIME)[
        0
    ]
    cyclone_end = list(gdf_ibtracs[gdf_ibtracs.NAME == name].reset_index().ISO_TIME)[-1]

    start_date = datetime.datetime.strptime(
        cyclone_start, "%Y-%m-%d %H:%M:%S"
    ) - datetime.timedelta(days=30)
    start_date = start_date.strftime("%Y-%m-%d %H:%M:%S")

    end_date = datetime.datetime.strptime(
        cyclone_end, "%Y-%m-%d %H:%M:%S"
    ) + datetime.timedelta(days=30)
    end_date = end_date.strftime("%Y-%m-%d %H:%M:%S")

    df_imerg_ = df_imerg[(df_imerg.date > start_date) & (df_imerg.date < end_date)]

    fig = px.line(
        df_imerg_,
        x="date",
        y="total_precipitation",
        facet_row="ADM1",
        title=f"Daily Precipitation During Cyclone {name} Across Provinces",
        template="simple_white",
    )
    fig.add_vrect(
        x0=cyclone_start,
        x1=cyclone_end,
        fillcolor="red",
        opacity=0.25,
        line_width=0,
        row="all",
    )
    fig.show()


def map_tracks(name, gdf_ibtracs):
    gdf_ = gdf_ibtracs[gdf_ibtracs.NAME == name]
    fig = px.scatter_mapbox(
        gdf_,
        lat=gdf_.geometry.y,
        lon=gdf_.geometry.x,
        size=gdf_.REU_USA_WIND.fillna(1),
        hover_data=["REU_USA_WIND", "ISO_TIME"],
        title=f"Track of Cyclone {name}",
    )
    fig.show()

### 1. Cyclone Idai: 2019-03-04

In [51]:
plot_precip("IDAI", df_imerg, gdf_ibtracs)
map_tracks("IDAI", gdf_ibtracs)

### 2. Cyclone Dineo: 2017-03-15

In [52]:
plot_precip("DINEO", df_imerg, gdf_ibtracs)
map_tracks("DINEO", gdf_ibtracs)

### 3. Cyclone Eloise

In [54]:
plot_precip("ELOISE", df_imerg, gdf_ibtracs)
map_tracks("ELOISE", gdf_ibtracs)

In [55]:
plot_precip("ANA", df_imerg, gdf_ibtracs)
map_tracks("ANA", gdf_ibtracs)

## Rainfall return periods

Q: How well does a 1 in 5 year max daily rainfall per province correlate with the days where a cyclone has reached its max speed over our selected provinces in Mozambique? 

A: **Not well!**  Out of 20 observed cyclones since 2003 and across all 4 provinces, there are only 2 cases where this would have been a successful trigger: 1) Cyclone Gombe in Nampula and 2) Cyclone Ana in Zambezia

Things to improve: 

- Account for irregularities and lag between rainfall and cyclones by taking 2-5 day rolling sums of rainfall, rather than just daily rainfall.
- Clean up the list of past cyclones that we care about. We might want to filter some of these out as not being cases where we would have wanted to activate. The list of past cyclones here is just everything from IBTrACS that passed over one of the selected provinces in Mozambique. We could also further filter this down per province.
- The calculated date for each cyclone is also not specific to a province. This date is just the date of max wind speed over the entire area across all four selected provinces.

Conclusion: Need to do more investigation to determine if observed rainfall could be part of a trigger for cyclones in Mozambique.

In [174]:
rps = []
rp = 5


for adm in ADMS:
    # Get annual max precip
    df_sel = df_imerg[df_imerg.ADM1 == adm]
    df_ = df_sel.sort_values(by="total_precipitation", ascending=False)

    df_["year"] = df_["date"].dt.year

    df_ = (
        df_.groupby("year")
        .agg({"total_precipitation": "max"})
        .reset_index()
        .rename(columns={"total_precipitation": "max_precipitation"})
    )

    df_["rank"] = df_["max_precipitation"].rank(ascending=False, method="first")

    df_["return_period"] = (len(df_) + 1) / df_["rank"]

    rp = 5
    precip = int(
        df_.iloc[(df_["return_period"] - rp).abs().argsort()[:1]]["max_precipitation"]
    )
    rps.append({"ADM1": adm, f"{rp}yr_return_period_precip": precip})

    df_cyclone_ = df_cyclone_summary.copy()
    df_cyclone_ = df_cyclone_[df_cyclone_.DAY_OF_MAX > "2003-03-03"]
    df_cyclone_ = df_cyclone_.merge(
        df_sel[["date", "total_precipitation"]],
        left_on="DAY_OF_MAX",
        right_on="date",
        how="left",
    )
    df_cyclone_["color"] = df_cyclone_["total_precipitation"].apply(
        lambda x: "red" if x < precip else "green"
    )

    cyclone_trace = go.Scatter(
        x=df_cyclone_["DAY_OF_MAX"],
        y=df_cyclone_["total_precipitation"],
        mode="markers",
        marker=dict(size=df_cyclone_["MAX_SPEED_ON_LAND"]),
        customdata=df_cyclone_[["NAME", "MAX_SPEED_ON_LAND", "TOTAL_AFFECTED"]],
        hovertemplate="<b>Cyclone %{customdata[0]}</b><br>"
        + "Total affected: %{customdata[2]:,.0f} people<br>"
        + "Max wind speed on land: %{customdata[1]:.0f}",
    )
    precip_trace = go.Scatter(
        x=df_sel["date"], y=df_sel["total_precipitation"], mode="lines"
    )
    fig = go.Figure(data=[precip_trace, cyclone_trace])

    fig.add_hline(
        y=precip,
        annotation_text=f"1 in {rp}-year RP",
        annotation_position="top left",
        line_width=3,
    )
    fig.update_layout(margin=dict(t=30))
    fig.update_layout(
        showlegend=False,
        margin=dict(t=30),
        template="simple_white",
        title=f"Total Observed Precipitation in {adm}",
    )
    fig.show()

df_rp = pd.DataFrame.from_dict(rps)