# Hurricanes 
- Hurricanes are not bearish by default
- Refiners often benefit
- Integrated majors barely flinch
- Only extreme or prolonged disruptions move stocks
- The signal exists ‚Äî but only in carefully defined events

In [23]:
import os
from pathlib import Path

# On your Local PC
BASE_DIR = Path(r"D:\MS_Data_Science_Thesis\Data_Extraction")
OUTPUT_DIR = BASE_DIR / "Raw_Data_Folder"

# Look for the file in the Output Directory
HURDAT_FILE = BASE_DIR / "Downloaded_datasets" / "hurdat2_atlantic.txt"

print(f"Checking for file at: {HURDAT_FILE}")

if not HURDAT_FILE.exists():
    print("‚ö†Ô∏è File not found! Please ensure 'hurdat2_atlantic.txt' is uploaded to the 'Raw_Data_Folder' in your GitHub repo or Google Drive.")
else:
    print("‚úÖ File found. Ready to parse.")

Checking for file at: D:\MS_Data_Science_Thesis\Data_Extraction\Downloaded_datasets\hurdat2_atlantic.txt
‚úÖ File found. Ready to parse.


In [25]:
def parse_hurdat2(path: Path):
    rows = []
    storm_id = storm_name = None

    with open(path, "r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            line = line.strip()
            if not line: continue
            
            parts = [p.strip() for p in line.split(",")]
            
            # Header lines have 3 or 4 parts (ID, Name, Count, [Empty])
            if len(parts) in [3, 4] and parts[0].startswith(('AL', 'CP', 'EP')):
                storm_id = parts[0]
                storm_name = parts[1]
                continue

            # Data lines have at least 8 key parts
            if len(parts) >= 8 and storm_id:
                try:
                    # Date (0) and Time (1)
                    dt = pd.to_datetime(parts[0] + parts[1], format='%Y%m%d%H%M')

                    # Lat (4) and Lon (5)
                    lat_s, lon_s = parts[4], parts[5]
                    lat = float(lat_s[:-1]) * (-1 if 'S' in lat_s.upper() else 1)
                    lon = float(lon_s[:-1]) * (-1 if 'W' in lon_s.upper() else 1)

                    rows.append({
                        "storm_id": storm_id,
                        "storm_name": storm_name,
                        "datetime": dt,
                        "status": parts[3],
                        "lat": lat,
                        "lon": lon,
                        "wind_kt": int(parts[6]) if parts[6].lstrip('-').isdigit() else None,
                        "pressure_mb": int(parts[7]) if parts[7].lstrip('-').isdigit() else None
                    })
                except:
                    continue
    return pd.DataFrame(rows)

# Run the parser
hurdat_df = parse_hurdat2(HURDAT_FILE)

In [27]:
# 1. Parse the text file
hurdat_df = parse_hurdat2(HURDAT_FILE)

if not hurdat_df.empty:
    # 2. Sort and Clean
    hurdat_df = hurdat_df.sort_values(["storm_id", "datetime"]).reset_index(drop=True)

    # 3. Save to the new directory
    out_csv = OUTPUT_DIR / "hurdat2_atlantic_clean.csv"
    hurdat_df.to_csv(out_csv, index=False)

    print(f"‚úÖ Parsed {len(hurdat_df)} rows from {hurdat_df['storm_id'].nunique()} unique storms.")
    print(f"üìÇ Saved to: {out_csv}")
    
    # Preview
    display(hurdat_df.head())
else:
    print("‚ùå No data was parsed. Check if the input .txt file is in the folder.")

‚úÖ Parsed 55230 rows from 1991 unique storms.
üìÇ Saved to: D:\MS_Data_Science_Thesis\Data_Extraction\Raw_Data_Folder\hurdat2_atlantic_clean.csv


Unnamed: 0,storm_id,storm_name,datetime,status,lat,lon,wind_kt,pressure_mb
0,AL011851,UNNAMED,1851-06-25 00:00:00,HU,28.0,-94.8,80,-999
1,AL011851,UNNAMED,1851-06-25 06:00:00,HU,28.0,-95.4,80,-999
2,AL011851,UNNAMED,1851-06-25 12:00:00,HU,28.0,-96.0,80,-999
3,AL011851,UNNAMED,1851-06-25 18:00:00,HU,28.1,-96.5,80,-999
4,AL011851,UNNAMED,1851-06-25 21:00:00,HU,28.2,-96.8,80,-999
