In [None]:
import pandas as pd
import requests
import io
import re
from us import states


def find_noaa_file_for_year(year):
    base = "https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/"
    html = requests.get(base).text

    pattern = rf"StormEvents_details-ftp_v1\.0_d{year}_c\d+\.csv\.gz"
    matches = re.findall(pattern, html)

    if not matches:
        return None

    return sorted(matches)[-1]


def extract_year(df):
    """Extract event year from available columns."""
    if "BEGIN_YEAR" in df.columns:
        return df["BEGIN_YEAR"]

    if "YEAR" in df.columns:
        return df["YEAR"]

    if "BEGIN_DATE_TIME" in df.columns:
        return pd.to_datetime(df["BEGIN_DATE_TIME"], errors="coerce").dt.year

    if "BEGIN_DATE" in df.columns:
        return pd.to_datetime(df["BEGIN_DATE"], errors="coerce").dt.year

    return None


def extract_month(df):
    """Extract event month from available NOAA columns."""
    if "BEGIN_MONTH" in df.columns:
        return df["BEGIN_MONTH"]

    if "BEGIN_DATE_TIME" in df.columns:
        return pd.to_datetime(df["BEGIN_DATE_TIME"], errors="coerce").dt.month

    if "BEGIN_DATE" in df.columns:
        return pd.to_datetime(df["BEGIN_DATE"], errors="coerce").dt.month

    return None


def flexible_col(df, col, default=0):
    """Safe getter: returns column if exists, else default."""
    return df[col] if col in df.columns else default


def parse_money(x):
    """Convert NOAA money strings like '15K', '3.1M' to numeric dollars."""
    if pd.isna(x) or x == "0":
        return 0
    try:
        m = x[-1]
        v = float(x[:-1])
        if m == "K":
            return v * 1_000
        elif m == "M":
            return v * 1_000_000
        else:
            return float(x)
    except:
        return 0


def download_noaa():
    dfs = []

    for year in range(2000, 2025):
        file_name = find_noaa_file_for_year(year)
        if not file_name:
            print(f"No NOAA files found for {year}")
            continue

        url = f"https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/{file_name}"
        print("Downloading NOAA:", url)

        try:
            df = pd.read_csv(io.BytesIO(requests.get(url).content),
                             compression="gzip", low_memory=False)

            df["year"] = extract_year(df)
            df["month"] = extract_month(df)

            df["fatalities"] = (
                flexible_col(df, "DEATHS_DIRECT") +
                flexible_col(df, "DEATHS_INDIRECT")
            )

            df["loss"] = flexible_col(df, "DAMAGE_PROPERTY").apply(parse_money)

            df["STATE"] = flexible_col(df, "STATE")
            df["disaster_name"] = flexible_col(df, "EVENT_TYPE", default="Unknown")
            df["source"] = "NOAA"

            dfs.append(df[["year", "month", "STATE", "disaster_name",
                           "fatalities", "loss", "source"]])

        except Exception as e:
            print(f"Skipping {year} (error: {e})")

    if dfs:
        return pd.concat(dfs, ignore_index=True)
    return pd.DataFrame()



print("=== NOAA ===")
df_noaa = download_noaa()



print("\nMerging datasets...")
final_df = df_noaa.copy()

final_df["STATE"] = final_df["STATE"].astype(str).str.upper()

output_file = "US_Disasters_2000_2024.csv"
final_df.to_csv(output_file, index=False)

print(f"\nSaved â†’ {output_file}")
print(final_df.head())