In [19]:
import os
import csv
from datetime import datetime, timezone
import re

def extract_storm_info(dat_file_path):
    """
    Extract storm name, start date, and end date from the .dat file's content.
    """
    storm_name = None
    storm_start_date = None
    storm_end_date = None
    dates = []
    storm_name_dict = {}

    # List of known basin codes
    basin_codes = ["AL", "EP", "CP", "WP", "IO", "SH"]

    # Set of number words up to 20
    number_words_set = {
        'ONE', 'TWO', 'THREE', 'FOUR', 'FIVE', 'SIX', 'SEVEN', 'EIGHT', 'NINE', 'TEN',
        'ELEVEN', 'TWELVE', 'THIRTEEN', 'FOURTEEN', 'FIFTEEN', 'SIXTEEN', 'SEVENTEEN',
        'EIGHTEEN', 'NINETEEN', 'TWENTY'
    }

    with open(dat_file_path, 'r') as file:
        for line in file:
            columns = line.strip().split()

            if len(columns) >= 28:
                potential_name = columns[27].strip().rstrip(',').upper()

                # Add potential_name to the dictionary
                if potential_name:
                    storm_name_dict[potential_name] = storm_name_dict.get(potential_name, 0) + 1

            # Parse dates as before
            if len(columns) >= 3:
                date_str = columns[2].strip()
                date_str = re.sub(r'[^0-9]', '', date_str)
                if len(date_str) == 10:
                    try:
                        date = datetime.strptime(date_str, "%Y%m%d%H")
                        dates.append(date)
                    except ValueError:
                        print(f"Error parsing date: {date_str} in file {dat_file_path}")

    # Step 1: Remove basin codes from the dictionary keys
    storm_name_dict = {k: v for k, v in storm_name_dict.items() if k not in basin_codes}

    # Step 2: Remove entries that contain digits
    storm_name_dict = {k: v for k, v in storm_name_dict.items() if not any(char.isdigit() for char in k)}

    # Step 3: Remove entries that are in number_words
    storm_name_dict = {k: v for k, v in storm_name_dict.items() if k not in number_words_set}

    # Get list of storm names
    storm_names = list(storm_name_dict.keys())

    # Determine the storm name based on the cleaned list
    if len(storm_names) == 0:
        storm_name = 'DISTURBANCE'
    elif storm_names == ['INVEST']:
        storm_name = 'INVEST'
    else:
        # Remove 'INVEST' if other names are present
        if 'INVEST' in storm_names:
            storm_names.remove('INVEST')

        if len(storm_names) == 1:
            storm_name = storm_names[0]
        else:
            # Multiple names remain
            # Attempt to strip basin codes from ends
            cleaned_names = set()
            for name in storm_names:
                cleaned_name = name
                for basin_code in basin_codes:
                    if name.endswith(basin_code):
                        name_without_basin = name[:-len(basin_code)].strip()
                        # Ensure the name is non-empty and alphabetical
                        if name_without_basin.isalpha():
                            cleaned_name = name_without_basin
                            break  # Stop checking after removing basin code
                cleaned_names.add(cleaned_name)

            if len(cleaned_names) == 1:
                storm_name = cleaned_names.pop()
            else:
                # Names don't match, take the last one
                storm_name = storm_names[-1]

  # Deduce the start and end dates and extract the year
    if dates:
        storm_start_datetime = min(dates)
        storm_end_datetime = max(dates)
        storm_start_date = storm_start_datetime.strftime("%Y-%m-%d %H:%M:%S")
        storm_end_date = storm_end_datetime.strftime("%Y-%m-%d %H:%M:%S")
        storm_year = storm_end_datetime.year  # Extract the year
    else:
        storm_start_date = None
        storm_end_date = None
        storm_year = None

    return storm_name.title(), storm_start_date, storm_end_date, storm_year

def process_dat_files(input_path, output_csv):
    """
    Process all .dat files in the input path, extract required information, 
    and save it in a .csv file.
    """
    data_rows = []
    
    # Loop over all files in the input path
    for file_name in os.listdir(input_path):
        if file_name.endswith('.dat'):
            # Extract Basin and Storm Number from the file name
            basin = file_name[1:3]  # Two letters after "a"
            storm_number = file_name[3:5]  # Two numbers after basin
            
            # Full path to the .dat file
            dat_file_path = os.path.join(input_path, file_name)
            
            # Extract Storm Name, Start Date, and End Date from the .dat file
            storm_name, storm_start_date, storm_end_date, storm_year = extract_storm_info(dat_file_path)
            
            # Append the extracted information as a row
            data_rows.append([basin, storm_number, storm_name, storm_start_date, storm_end_date, storm_year, dat_file_path])
    
    # Write the collected data to a CSV file
    with open(output_csv, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        # Write the header
        writer.writerow(['Basin', 'Storm_Number', 'Storm_Name', 'Storm_Start_Date', 'Storm_End_Date','Storm_Year','adeck_path'])
        # Write the data rows
        writer.writerows(data_rows)

    print(f"CSV file {output_csv} has been created successfully.")

In [20]:
# Example usage:
input_directory = "./forecast_data/"  # Replace with the actual path to the .dat files
output_csv_file = "storm_adeck_directory.csv"
process_dat_files(input_directory, output_csv_file)

CSV file storm_adeck_directory.csv has been created successfully.


In [21]:
start_year = datetime.now().year
current_year = datetime.now().year
range(start_year, current_year+1)

range(2024, 2025)