In [111]:
import requests
from datetime import datetime, timedelta
import os

def fetch_sst_anomaly():
    # Use 2 days ago to ensure data availability
    date = datetime.utcnow().date() - timedelta(days=100)
    timestamp = date.strftime("%Y-%m-%dT12:00:00Z")

    dataset_id = "noaacrwsstanomalyDaily"
    variable = "sea_surface_temperature_anomaly"

    # Build ERDDAP URL
    url = (
        f"https://coastwatch.noaa.gov/erddap/griddap/{dataset_id}.csv?"
        f"{variable}%5B({timestamp}):1:({timestamp})%5D"
        f"%5B(-10.475):1:(-24.475)%5D"
        f"%5B(142.475):1:(154.025)%5D"
    )

    # Determine project root safely
    try:
        project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    except NameError:
        # Running inside a notebook in code/ folder, go one level up to project root
        project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))


    # Build folder path relative to project root
    folder_path = os.path.join(
        project_root, "data", "raw", "daily",
        str(date.year),
        f"{date.month:02d}",
        f"{date.day:02d}"
    )
    os.makedirs(folder_path, exist_ok=True)

    # Build filename
    filename = f"sst_anomaly_{date.strftime('%Y_%m_%d')}.csv"
    output_path = os.path.join(folder_path, filename)

    print(f"Fetching SST anomaly for {date} from NOAA ERDDAP...")
    response = requests.get(url)
    response.raise_for_status()

    with open(output_path, "wb") as f:
        f.write(response.content)

    print(f"Saved to: {output_path}")

if __name__ == "__main__":
    fetch_sst_anomaly()

  date = datetime.utcnow().date() - timedelta(days=100)


Fetching SST anomaly for 2025-03-11 from NOAA ERDDAP...
Saved to: c:\Users\AdrienSourdille\Coral_Bleaching_Live_Map\data\raw\daily\2025\03\11\sst_anomaly_2025_03_11.csv


In [19]:
import pandas as pd

def describe_sst_data(filepath=r"C:\Users\AdrienSourdille\Coral_Bleaching_Live_Map\data\raw\latest_sst_anomaly.csv"):
    # Load CSV (skip first rows if they are metadata, adjust skiprows if needed)
    df = pd.read_csv(filepath, skiprows=1)  # ERDDAP usually has 1 header row before data

    print("Columns:", df.columns.tolist())
    print("\nSample rows:")
    print(df.head())

    print("\nSummary statistics:")
    print(df.describe())

    print("\nData types:")
    print(df.dtypes)

if __name__ == "__main__":
    describe_sst_data()


Columns: ['UTC', 'degrees_north', 'degrees_east', 'degree_C']

Sample rows:
                    UTC  degrees_north  degrees_east  degree_C
0  2025-06-17T12:00:00Z        -10.475       142.475      1.38
1  2025-06-17T12:00:00Z        -10.475       142.525      1.41
2  2025-06-17T12:00:00Z        -10.475       142.575      1.42
3  2025-06-17T12:00:00Z        -10.475       142.625      1.46
4  2025-06-17T12:00:00Z        -10.475       142.675      1.48

Summary statistics:
       degrees_north  degrees_east      degree_C
count   65192.000000  65192.000000  42699.000000
mean      -17.475000    148.250000      1.171771
std         4.055891      3.348626      0.406953
min       -24.475000    142.475000     -4.260000
25%       -20.975000    145.362500      0.920000
50%       -17.475000    148.250000      1.230000
75%       -13.975000    151.137500      1.460000
max       -10.475000    154.025000      1.970000

Data types:
UTC               object
degrees_north    float64
degrees_east     floa