In [10]:
import os
import requests
from datetime import datetime, timedelta
import xarray as xr
import numpy as np
import pandas as pd

# === USER SETTINGS ===
start_date = datetime(2025, 8, 1)
end_date = datetime(2025, 8, 5)
output_csv = "nino34_sst_timeseries.csv"
download_dir = "./data/raw/oisst_data"
base_url = "https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr"

# === Ensure download directory exists ===
os.makedirs(download_dir, exist_ok=True)

# === Helper function to download a single file
def download_nc_file(date):
    filename = f"oisst-avhrr-v02r01.{date.strftime('%Y%m%d')}_preliminary.nc"
    file_url = f"{base_url}/{date.year}/{filename}"
    print(file_url)
    local_path = os.path.join(download_dir, filename)

    if os.path.exists(local_path):
        print(f"Already downloaded: {filename}")
        return local_path

    print(f"Downloading: {filename}")
    response = requests.get(file_url)
    if response.status_code == 200:
        with open(local_path, 'wb') as f:
            f.write(response.content)
        return local_path
    else:
        print(f"Failed to download {filename} (status {response.status_code})")
        return None

# === Helper function to extract Niño 3.4 SST from a file ===
def extract_nino34_sst(filepath):
    try:
        ds = xr.open_dataset(filepath)

        # Convert lon to 0–360 if needed
        if ds.lon.max() <= 180:
            ds = ds.assign_coords(lon=((ds.lon + 360) % 360))

        # Define Niño 3.4 box (5N–5S, 170W–120W => lat -5 to 5, lon 190 to 240)
        region = ds.sel(lat=slice(-5, 5), lon=slice(190, 240))
        sst = region['sst'].where(region['sst'] > -100)  # Mask land

        weights = np.cos(np.deg2rad(sst.lat))
        weighted_sst = sst.weighted(weights)

        sst_mean = weighted_sst.mean(dim=['lat', 'lon']).values.item()
        date = ds['time'].values.item()

        return {'date': pd.to_datetime(date), 'sst': round(sst_mean, 3)}
    except Exception as e:
        print(f"Error processing {filepath}: {e}")
        return None

# === Main Loop ===
current = start_date
records = []

while current <= end_date:
    file_path = download_nc_file(current)
    if file_path:
        result = extract_nino34_sst(file_path)
        if result:
            records.append(result)
    current += timedelta(days=1)

# === Save results to CSV ===
if records:
    df = pd.DataFrame(records)
    df.sort_values('date', inplace=True)
    df.to_csv(output_csv, index=False)
    print(f"\n✅ Saved daily Niño 3.4 SST time series to: {output_csv}")
    print(df)
else:
    print("No data extracted.")


https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/2025/oisst-avhrr-v02r01.20250801_preliminary.nc
Downloading: oisst-avhrr-v02r01.20250801_preliminary.nc
Failed to download oisst-avhrr-v02r01.20250801_preliminary.nc (status 404)
https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/2025/oisst-avhrr-v02r01.20250802_preliminary.nc
Downloading: oisst-avhrr-v02r01.20250802_preliminary.nc
Failed to download oisst-avhrr-v02r01.20250802_preliminary.nc (status 404)
https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/2025/oisst-avhrr-v02r01.20250803_preliminary.nc
Downloading: oisst-avhrr-v02r01.20250803_preliminary.nc
Failed to download oisst-avhrr-v02r01.20250803_preliminary.nc (status 404)
https://www.ncei.noaa.gov/data/sea-surface-temperature-optimum-interpolation/v2.1/access/avhrr/2025/oisst-avhrr-v02r01.20250804_preliminary.nc
Downloading: oisst-avhrr-v02r01.202