In [22]:
import xarray as xr
import numpy as np
import pandas as pd
from pathlib import Path

In [None]:
# File Path
BASE_DIR = Path().resolve().parents[1]

# Load Arctic gridded temperature NetCDF file
arctic_path = BASE_DIR / "data" / "raw" / "Global_TAVG_Gridded_0p25deg.nc"
ds = xr.open_dataset(arctic_path, decode_times= True)
print(ds)

<xarray.Dataset> Size: 9GB
Dimensions:       (longitude: 1440, latitude: 720, time: 2106, month_number: 12)
Coordinates:
  * longitude     (longitude) float32 6kB -179.9 -179.6 -179.4 ... 179.6 179.9
  * latitude      (latitude) float32 3kB -89.88 -89.62 -89.38 ... 89.62 89.88
  * time          (time) float64 17kB 1.85e+03 1.85e+03 ... 2.025e+03 2.025e+03
Dimensions without coordinates: month_number
Data variables:
    land_mask     (latitude, longitude) float64 8MB ...
    areal_weight  (latitude, longitude) float64 8MB ...
    temperature   (time, latitude, longitude) float32 9GB ...
    climatology   (month_number, latitude, longitude) float32 50MB ...
Attributes:
    Conventions:                 Berkeley Earth Internal Convention (based on...
    title:                       Berkeley Earth Surface Temperature Anomaly F...
    file_creation:               18-Jul-2025 01:40:07
    institution:                 Berkeley Earth
    land_source_analysis_date:   04-Jul-2025 03:34:08
    oc

In [26]:
# Extract temperature, latitude, time
temperature = ds["temperature"]
latitudes = ds["latitude"]
times = ds["time"].values

# Step 1: Filter for Arctic region (latitude >= 66.5°N)
arctic_temp = temperature.sel(latitude=latitudes[latitudes >= 66.5])

years = times.astype(int)
arctic_temp.coords["year"] = ("time", years)

# Step 3: Compute annual mean Arctic anomaly
arctic_df = (
    arctic_temp
    .groupby("year")
    .mean(dim=["time","latitude", "longitude"], skipna=True)
    .to_dataframe()
    .reset_index()[["year", "temperature"]]
    .rename(columns={"temperature": "arctic_anomaly_c"})
)


In [29]:
OUTPUT_PATH = BASE_DIR / "data" / "processed" / "Arctic_Dataset.csv"

arctic_df.to_csv(OUTPUT_PATH, index=False)

print("✅ Arctic dataset saved:", OUTPUT_PATH)
print("📈 Shape:", arctic_df.shape)
print(arctic_df.head())



✅ Arctic dataset saved: D:\Desktopped\UCD\Summer\Summer project\Project_ACM\data\processed\Arctic_Dataset.csv
📈 Shape: (176, 2)
   year  arctic_anomaly_c
0  1850         -0.556655
1  1851          0.362077
2  1852          0.279491
3  1853         -0.221799
4  1854         -0.203260
