# Loading Data from NHC Hurricane Archive 

Data Availability: [https://www.nhc.noaa.gov/data/hurdat/]
- See format details in `format.pdf`

There are two types of rows for this dataset
- Type 1 Format: entries, date, time,
- Type 2 Format: record_identifier, status, lat, lon, vmax, pres, 34ne, 34se, 34sw, 34nw, 50ne, 50se, 50sw, 50nw, 64ne, 64se, 64sw, 64nw, rmax
   - Note: Date is YYYMMDD and Time is UTC

In [1]:
import pandas as pd
import numpy as np

In [2]:
filename = 'hurdat2-1851-2023-051124.txt'
missingVal = -999
basin='AL'
columnHeaders = ['id', 'name', 'entries', 'date', 'time', 'record_identifier', 'status', 'lat', 'lon', 'vmax', 'pres', '34ne', '34se', '34sw', '34nw', '50ne', '50se', '50sw', '50nw', '64ne', '64se', '64sw', '64nw', 'rmax']

In [3]:
# Reformatting input file to csv and creating a new file named 'data.csv'
with open(filename, 'r') as f_in, open('data.csv', 'w') as f_out:
    tmp = ""
    for line in f_in:
        line = line.replace(' ', '')  # Remove spaces
        if line.startswith(basin):
            tmp = line.strip()  # Store the basin line
        else:
            line = tmp + line  # Prepend the last stored basin
            f_out.write(line)  # Write filtered lines immediately

In [4]:
data = pd.read_csv('data.csv', header=None, names=columnHeaders)
data = data.drop('entries', axis=1)  
data

Unnamed: 0,id,name,date,time,record_identifier,status,lat,lon,vmax,pres,...,34nw,50ne,50se,50sw,50nw,64ne,64se,64sw,64nw,rmax
0,AL011851,UNNAMED,18510625,0,,HU,28.0N,94.8W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1,AL011851,UNNAMED,18510625,600,,HU,28.0N,95.4W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,AL011851,UNNAMED,18510625,1200,,HU,28.0N,96.0W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
3,AL011851,UNNAMED,18510625,1800,,HU,28.1N,96.5W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
4,AL011851,UNNAMED,18510625,2100,L,HU,28.2N,96.8W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54744,AL212023,TWENTY-ONE,20231023,1800,,TD,11.5N,83.2W,25,1007,...,0,0,0,0,0,0,0,0,0,60
54745,AL212023,TWENTY-ONE,20231024,0,,TD,12.2N,83.4W,25,1007,...,0,0,0,0,0,0,0,0,0,60
54746,AL212023,TWENTY-ONE,20231024,130,L,TD,12.4N,83.5W,25,1007,...,0,0,0,0,0,0,0,0,0,60
54747,AL212023,TWENTY-ONE,20231024,600,,TD,13.0N,83.8W,25,1007,...,0,0,0,0,0,0,0,0,0,60


# Filtering Dataset Based on NOAA HRRR
HRRR Data Availability: [https://registry.opendata.aws/noaa-hrrr-pds/]

Here, we will use the AWS CLI and subprocess library to execute commands
1. Download the AWS CLI: [https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html]
2. Find the location of the AWS CLI by running `where aws` in your terminal
3. Replace the `aws_path` variable below with your AWS CLI path


In [8]:
import subprocess

# Replace with the full path to the AWS CLI executable
aws_path = r"C:\Program Files\Amazon\AWSCLIV2\aws.exe"  

# AWS command to list files in the bucket
command = [aws_path, "s3", "ls", "s3://noaa-hrrr-bdp-pds/", "--no-sign-request"]

result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

if result.returncode == 0:
    print(result.stdout)
else:
    print(f"Error: {result.stderr}")

                           PRE hrrr.20140730/
                           PRE hrrr.20140731/
                           PRE hrrr.20140801/
                           PRE hrrr.20140802/
                           PRE hrrr.20140803/
                           PRE hrrr.20140804/
                           PRE hrrr.20140805/
                           PRE hrrr.20140806/
                           PRE hrrr.20140807/
                           PRE hrrr.20140808/
                           PRE hrrr.20140809/
                           PRE hrrr.20140810/
                           PRE hrrr.20140811/
                           PRE hrrr.20140812/
                           PRE hrrr.20140813/
                           PRE hrrr.20140814/
                           PRE hrrr.20140815/
                           PRE hrrr.20140816/
                           PRE hrrr.20140817/
                           PRE hrrr.20140818/
                           PRE hrrr.20140819/
                           PRE hrr

### This is an example command to download a sample from the HRRR AWS database
- This command may be used to sync the entire HRRR directory

In [None]:
# Define the AWS CLI command to sync a directory
command = [
    aws_path, "s3", "sync", 
    "s3://noaa-hrrr-bdp-pds/hrrr.20210101/", 
    "hrrr.20210101/",  # Destination directory on your local machine
    "--no-sign-request"
]

result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

In [None]:
# Define the AWS CLI command to sync a directory
command = [
    aws_path, "s3", "ls", 
    "s3://noaa-hrrr-bdp-pds/hrrr.20210101/", 
    "hrrr.20210101/",  # Destination directory on your local machine
    "--summarize", "--human-readable", "--recursive"
]

result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
print(result.stderr)


Unknown options: hrrr.20210101/



### Now, We filter the NHC Hurricane Archive based on the parameters of HRRR

In [None]:
earliest_data = '20140730'