## Hurrican Research Division --> Atlantic Hurrican date from 1851 to 2024
- This notebook is intended to keep all data processing concise. To take the raw data and process it into usable data.
- The following section is transforming the data set, hurdat, a data set from the Hurricane Research Division. The data set came in a txt file with no column names. From there it is converted into a proccessed csv file and placed into the processed folder in the data folder.  
- Link : https://www.aoml.noaa.gov/hrd/hurdat/Data_Storm.html

In [1]:
import csv
import os

# Define file paths, take the raw unprocessed data and produce a cleaned CSV file placing it into processed folder
input_file = "../data/raw/hurdat2_atlantic.txt"
output_folder = "../data/processed"
output_file = os.path.join(output_folder, "hurdat2_atlantic.csv")

os.makedirs(output_folder, exist_ok=True)# Ensure the output directory exists

# Open the input file and output CSV file
with open(input_file, 'r') as f_in, open(output_file, 'w', newline='') as f_out:
    writer = csv.writer(f_out)
    
    writer.writerow(["Storm_id", "Storm_name", "Date", "Time", "Type_of_storm", "Latitude", "Longitude", "Wind_knots", "Pressure_mb"])
    
    current_id = ""
    current_name = ""
    
    for line in f_in: # Read each line in the input file
        line = line.strip() # Remove leading/trailing whitespace
        if not line: # Skip empty lines
            continue
        
        # Header line (storm start)
        if line[0].isalpha() and "," in line: # Identify header lines
            parts = line.split(",") # Split by comma
            current_id = parts[0].strip() # Storm ID
            current_name = parts[1].strip() # Storm name
            continue
        
        # Data line
        parts = [p.strip() for p in line.split(",")] # Split and strip each part
        if len(parts) >= 7: # Ensure there are enough parts
            date = parts[0] # Date
            time = parts[1] # Time
            type_ = parts[3] # Type of storm
            lat = parts[4] # Latitude
            lon = parts[5] # Longitude
            wind = parts[6] # Wind knots
            pressure = parts[7] if len(parts) > 7 else "" # Pressure in mb (if available)
            
            writer.writerow([current_id, current_name, date, time, type_, lat, lon, wind, pressure]) # Write to CSV


In [28]:
import pandas as pd      
import numpy as np
import datetime as dt
    
atlantic_storm = pd.read_csv("../data/processed/hurdat2_atlantic.csv")
atlantic_storm['Date'] = atlantic_storm['Date'].astype('string') # Ensure date is string
atlantic_storm['Time'] = atlantic_storm['Time'].apply(lambda x: f"{int(x):04d}") # Ensure time is 4 digits

atlantic_storm['Timestamp(UTC)'] = pd.to_datetime(atlantic_storm['Date'] + atlantic_storm['Time'], format='%Y%m%d%H%M') # Combine date and time into a single timestamp


# Convert Latitude (N/S)
atlantic_storm['Lat'] = atlantic_storm['Latitude'].str[:-1].astype(float)
atlantic_storm['Lat'] *= np.where(atlantic_storm['Latitude'].str[-1] == 'S', -1, 1)

# Convert Longitude (W/E)
atlantic_storm['Lon'] = atlantic_storm['Longitude'].str[:-1].astype(float)
atlantic_storm['Lon'] *= np.where(atlantic_storm['Longitude'].str[-1] == 'W', -1, 1)

atlantic_storm['Wind_mph'] = atlantic_storm['Wind_knots'] * 1.15078 # Convert knots to mph
atlantic_storm['Month'] = atlantic_storm["Timestamp(UTC)"].dt.month # Extract month from timestamp
atlantic_storm['Year'] = atlantic_storm["Timestamp(UTC)"].dt.year # Extract year from timestamp
atlantic_storm['Storm_duration_days'] = atlantic_storm.groupby('Storm_id')['Timestamp(UTC)'].transform(lambda x: (x.max() - x.min()).days) # Calculate storm duration in days
atlantic_storm['Storm_category'] = pd.cut(atlantic_storm['Wind_mph'], 
                                          bins=[0, 38, 73, 96, 111, 130, np.inf], 
                                          labels=['Tropical Depression', 'Tropical Storm', 'Category 1', 'Category 2', 'Category 3', 'Category 4/5'], 
                                          right=False) # Categorize storm intensity


atlantic_storm['Timestamp'] = pd.to_datetime(atlantic_storm['Date'] + atlantic_storm['Time'], format='%Y%m%d%H%M') # Combine date and time into a single timestamp
cols = ["Storm_id", "Storm_name", "Timestamp(UTC)", "Storm_duration_days", "Storm_category", "Type_of_storm", "Month", "Year", "Wind_mph", "Wind_knots", "Pressure_mb", "Lat", "Lon"]
atlantic_storm = atlantic_storm[cols] # Reorder columns

atlantic_storm.to_csv("../data/processed/atlantic_storms_cleaned.csv", index=False) # Save cleaned data to CSV

In [27]:
# Wilmington bounding box
lat_min, lat_max = 34.0, 34.5
lon_min, lon_max = -78.0, -77.5

df_wilmington = atlantic_storm[
    (atlantic_storm['Lat'] >= lat_min) & (atlantic_storm['Lat'] <= lat_max) &
    (atlantic_storm['Lon'] >= lon_min) & (atlantic_storm['Lon'] <= lon_max)]
df_wilmington

Unnamed: 0,Storm_id,Storm_name,Timestamp(UTC),Storm_duration_days,Storm_category,Type_of_storm,Month,Year,Wind_mph,Wind_knots,Pressure_mb,Lat,Lon
556,AL021857,UNNAMED,1857-09-13 06:00:00,11,Category 1,HU,9,1857,92.0624,80,-999,34.5,-77.5
2347,AL051872,UNNAMED,1872-10-25 00:00:00,6,Tropical Storm,TS,10,1872,57.539,50,-999,34.3,-77.7
2348,AL051872,UNNAMED,1872-10-25 01:00:00,6,Tropical Storm,TS,10,1872,57.539,50,-999,34.4,-77.7
2873,AL021876,UNNAMED,1876-09-17 14:00:00,7,Category 1,HU,9,1876,92.0624,80,980,34.4,-77.6
5947,AL071888,UNNAMED,1888-10-11 18:00:00,4,Tropical Storm,TS,10,1888,63.2929,55,-999,34.5,-77.5
7717,AL041894,UNNAMED,1894-09-28 06:00:00,13,Tropical Storm,TS,9,1894,69.0468,60,-999,34.4,-77.6
9629,AL031901,UNNAMED,1901-07-12 22:00:00,9,Tropical Storm,TS,7,1901,40.2773,35,-999,34.0,-77.9
11238,AL011907,UNNAMED,1907-06-29 18:00:00,6,Tropical Storm,EX,6,1907,57.539,50,-999,34.0,-78.0
20791,AL021946,UNNAMED,1946-07-06 12:00:00,5,Tropical Storm,TS,7,1946,51.7851,45,1005,34.4,-78.0
21673,AL071949,UNNAMED,1949-09-13 06:00:00,3,Tropical Storm,TS,9,1949,40.2773,35,-999,34.2,-77.6


In [4]:
from geopy.distance import geodesic

# Example: Wilmington shelters
shelters = pd.DataFrame({
    'name': ['Shelter A', 'Shelter B'],
    'Lat': [34.22, 34.28],
    'Lon': [-77.94, -77.85]
})

# Compute distance (km) from each storm point to each shelter
def min_distance_to_shelters(storm_point):
    distances = [geodesic((storm_point['Lat'], storm_point['Lon']), (s['Lat'], s['Lon'])).km for _, s in shelters.iterrows()]
    return min(distances)

df_wilmington['min_distance_km'] = df_wilmington.apply(min_distance_to_shelters, axis=1)
df_wilmington.to_csv("../data/processed/hurdat2_wilmington.csv", index=False)
