# Notebook that downloads GPM rainfall data done per typhoon

In [1]:
%load_ext jupyter_black

In [16]:
import getpass
import os
from pathlib import Path


import pandas as pd
import datetime as dt
from bs4 import BeautifulSoup
import requests

from dotenv import load_dotenv

load_dotenv()

True

In [17]:
# Setting directories
input_dir = (
    Path(os.getenv("STORM_DATA_DIR"))
    / "analysis/02_new_model_input_fji/03_rainfall/input"
)
# Setting path to save the GPM data
gpm_file_name = "gpm_data/rainfall_data/output_hhr/"
gpm_folder_path = Path(input_dir, gpm_file_name)

In [18]:
# To create an account for downloading the data
# follow the instructions here: https://registration.pps.eosdis.nasa.gov/registration/
# Change the user name and provide the password in the code
USERNAME = getpass.getpass(prompt="Username: ", stream=None)
PASSWORD = getpass.getpass(prompt="Password: ", stream=None)

# Setting the number of days prior to the landfall data for which to collect data
DAYS_TO_LANDFALL = 2

Username:  ········
Password:  ········


In [19]:
# Load and clean the typhoon metadata
# We really only care about the landfall date
typhoon_metadata = pd.read_csv(input_dir / "metadata_typhoons.csv").set_index(
    "typhoon"
)
for colname in ["startdate", "enddate", "landfalldate"]:
    typhoon_metadata[colname] = pd.to_datetime(
        typhoon_metadata[colname], format="%d/%m/%Y"
    )
typhoon_metadata

Unnamed: 0_level_0,startdate,enddate,landfalldate,landfall_time
typhoon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bebe1972,1972-10-19,1972-10-26,1972-10-22,20:30:00
bebe1972,1972-10-27,1972-10-28,1972-10-27,00:00:00
juliette1973,1973-04-03,1973-04-04,1973-04-03,12:30:00
juliette1973,1973-04-02,1973-04-03,1973-04-03,00:00:00
juliette1973,1973-04-05,1973-04-06,1973-04-05,00:00:00
...,...,...,...,...
tino2020,2020-01-16,2020-01-19,2020-01-17,05:30:00
harold2020,2020-04-01,2020-04-10,2020-04-08,00:30:00
yasa2020,2020-12-13,2020-12-20,2020-12-17,06:00:00
bina2021,2021-01-31,2021-01-31,2021-01-31,13:30:00


In [31]:
# %% Functions used
def list_files(url):
    response = requests.get(url, auth=(USERNAME, PASSWORD))
    if response.status_code == 404:
        print(f"{url} not available")
        return []
    page = response.text
    soup = BeautifulSoup(page, "html.parser")
    return [
        url + "/" + node.get("href")
        for node in soup.find_all("a")
        if node.get("href").endswith("tif")
    ]


def download_gpm_http(start_date, end_date, download_path):
    base_url = "https://arthurhouhttps.pps.eosdis.nasa.gov/pub/gpmdata"

    date_list = pd.date_range(start_date, end_date)
    file_list = []

    for date in date_list:
        print(f"Downloading data for date {date}")
        day_path = download_path / date.strftime("%Y%m%d")
        day_path.mkdir(parents=True, exist_ok=True)

        url = f"{base_url}/{date.strftime('%Y/%m/%d')}/gis"
        tiff_files = list_files(url=url)

        for tiff_file in tiff_files:
            file_name = tiff_file.split("/")[-1]

            file_path = day_path / file_name
            # don't download if file already exists
            if file_path.exists():
                print(f"{file_path} already exists")
                continue
            file_list.append(file_path)
            r = requests.get(tiff_file, auth=(user_name, user_name))
            print(r)
            open(file_path, "wb").write(r.content)

    return file_list

## Download the data

This section is for downloading the data.
It takes a long time to complete.

In [32]:
for typhoon, metadata in typhoon_metadata.iterrows():
    start_date = metadata["landfalldate"] - dt.timedelta(days=DAYS_TO_LANDFALL)
    end_date = metadata["landfalldate"] + dt.timedelta(days=DAYS_TO_LANDFALL)
    if start_date.year < 1987:
        print("too early, skipping")
        continue
    print(f"Downloading data for {typhoon} between {start_date} and {end_date}") 
    download_gpm_http(start_date=start_date, 
                      end_date=end_date, 
                      download_path=gpm_folder_path / typhoon / "GPM")

too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
too early, skipping
Downloading data for tusi1987 between 1987-01-16 00:00:00 and 1987-01-20 00:00:00
Downloading data for date 1987-01-16 00:00:00
https://arthurhouhttps.pps.eosdis.nasa.gov/pub/gpmdata/1987/01/16/gis not available
Downloading data for date 1987-01-17 00:00:00
https://arthurhouhttps.pps.eosdis.nasa.gov/pub/gpmdata/1987/01/17/gis not available
Downloading data for date 1987-01-18 00:00:00
https://arthurhouhttps.pps.eosdis.nasa.gov/pub/gpmdata/1987/01/18/gis no

NameError: name 'user_name' is not defined