In [1]:
import datetime
import shutil
import requests
import glob
import os

import zipfile
import glob
import warnings
import matplotlib.pyplot as plt
import pandas as pd

from multiprocessing import Pool, cpu_count
from functools import partial
from send_emails import send_txt_email

import subprocess

warnings.filterwarnings("ignore")

In [2]:
import sys
sys.stdout = open(f'JP_LOG_{datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")}.log','wt')

BASE_DIR = "/rds/general/user/zr523/home/researchProject/satellite/himawari"

In [3]:
cyclones_path = "./list_of_cyclones.xlsx"
df = pd.read_excel(cyclones_path)
df = df.drop('Unnamed: 8', axis=1)
himawari_df = df[df["Satellite Data"] == "JMA - Himawari 8/9"]
himawari_df

Unnamed: 0,Region,Sub-Region,Satellite Data,Name,SSHWS Category - Peak,Form Date,Dissipated Date,Wikipedia Link
15,Australia,Timor Sea - East Indian Ocean,JMA - Himawari 8/9,Ilsa,Category 5,06-04-2022,15-04-2022,https://en.wikipedia.org/wiki/Cyclone_Ilsa
16,Australia,Timor Sea - East Indian Ocean,JMA - Himawari 8/9,Seroja,Category 3,03-04-2021,12-04-2021,https://en.wikipedia.org/wiki/Cyclone_Seroja
17,Australia,Coral Sea - South Pacific Ocean,JMA - Himawari 8/9,Niran,Category 5,27-02-2021,08-03-2021,https://en.wikipedia.org/wiki/Cyclone_Niran
18,Australia,Timor Sea - East Indian Ocean,JMA - Himawari 8/9,Damien,Category 2,03-02-2020,09-02-2020,https://en.wikipedia.org/wiki/Cyclone_Damien
19,Australia,Timor Sea - East Indian Ocean,JMA - Himawari 8/9,Ferdinand,Category 3,22-02-2020,01-03-2020,https://en.wikipedia.org/wiki/2019%E2%80%9320_...
20,Australia,Timor Sea - East Indian Ocean,JMA - Himawari 8/9,Veronica,Category 4,18-03-2019,31-03-2019,https://en.wikipedia.org/wiki/Cyclone_Veronica
48,West Pacific Ocean,East Asia - Japan,JMA - Himawari 8/9,Noru,Category 5,21-09-2022,01-10-2022,https://en.wikipedia.org/wiki/Typhoon_Noru
49,West Pacific Ocean,East Asia - Japan,JMA - Himawari 8/9,Nesat,Category 2,14-10-2022,20-10-2022,https://en.wikipedia.org/wiki/2022_Pacific_typ...
50,West Pacific Ocean,East Asia - Japan,JMA - Himawari 8/9,Nanmadol,Category 4,09-09-2022,20-09-2022,https://en.wikipedia.org/wiki/Typhoon_Nanmadol...
51,West Pacific Ocean,East Asia - Japan,JMA - Himawari 8/9,Rai,Category 5,11-12-2021,21-12-2021,https://en.wikipedia.org/wiki/Typhoon_Rai


In [4]:
def is_stub_already_present(dest_folder, stub):
  stubs = [x.split('/')[-1] for x in glob.glob(dest_folder+"*.bz2")]
  if stub in stubs: 
      print(f"Present: {stub}")
      return True
  return False

def fetch_aws_file(year, month, day, hour, stub, dest_folder):
  statement = f"aws s3 cp --no-sign-request s3://noaa-himawari8/AHI-L1b-FLDK/{year}/{month:02}/{day:02}/{hour:02}00/{stub} {dest_folder}"
  command = statement.split()
  process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
  output, error = process.communicate()

def download_himawara_b13(date, name): 
  year = date.year ; month = date.month ; day = date.day ; hour = date.hour
  statement = f"aws s3 ls --no-sign-request s3://noaa-himawari8/AHI-L1b-FLDK/{year}/{month:02}/{day:02}/{hour:02}00/HS_H08_{year}{month:02}{day:02}_{hour:02}00_B13_FLDK_"
  command = statement.split()
  process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
  output, error = process.communicate()
  filenames = [x.split(' ')[-1] for x in output.split('\n') if x != '']

  dest_folder = f"{BASE_DIR}/data/bz2/{name.lower()}/{year}-{month:02}-{day:02}/{hour:02}00/"
  os.makedirs(dest_folder, exist_ok=True)

  print(f'[{name}] - {date.strftime("%Y-%m-%d %H:%M")} - Downloading files ... ')
  for stub in filenames:
    if not is_stub_already_present(dest_folder, stub):
      fetch_aws_file(year, month, day, hour, stub, dest_folder)
  print(f'[{name}] - {date.strftime("%Y-%m-%d %H:%M")} - Downloaded.')

In [5]:
for idx in range(len(himawari_df)):
  row = himawari_df.iloc[idx]
  name = row["Name"]
  start_date = datetime.datetime.strptime(row["Form Date"], "%d-%m-%Y")
  end_date = datetime.datetime.strptime(row["Dissipated Date"], "%d-%m-%Y") + datetime.timedelta(days=1)

  current_date = start_date
  dates = [start_date]
  while current_date < end_date:
    current_date += datetime.timedelta(hours=1)
    dates.append(current_date)

  pool = Pool(cpu_count())
  download_func = partial(download_himawara_b13, name=name)
  results = pool.map(download_func, dates)
  pool.close()
  pool.join()

  print(f'[{name}] - All downloads are finished.')
        
  with open("JP_COMPLETE.txt", "a+") as file:
      file.write(f"{name}\t{datetime.datetime.now()}\n")

  subject = f"[COMPLETED] Download - Cyclone {name}"
  message_txt = f"""Download Completed"""
  send_txt_email(message_txt, subject)