In [1]:
import xarray as xr
import pandas as pd
from pathlib import Path
import numpy as np

NC_PATH = r"C:\Users\Krist\Documents\Work\Data Science\projects\Carbon\data\raw\t2m.nc"
OUT_PATH = Path(r"C:\Users\Krist\OneDrive\Documents\Data Analysis\Practice\Carbon\data\processed\uk_temp_hourly.parquet")
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)

# Load ONLY what you need, with safe chunking
ds = xr.open_dataset(
    NC_PATH,
    engine="netcdf4",
    chunks={"time": 72},        # small time chunks keeps memory stable
    mask_and_scale=True,        # interpret missing values properly
)

# Use the valid variable
t2m = ds["t2m_0001"].sel(time=slice("2020-01-01", None))

# UK bounding box (global file -> reduce area massively)
# latitude is typically descending; slice(61,49) handles that case
t2m_uk_box = t2m.sel(latitude=slice(61, 49), longitude=slice(-8, 2))

# Belt-and-braces: remove any fill values if present
fill = t2m_uk_box.attrs.get("_FillValue")
missing = t2m_uk_box.attrs.get("missing_value")
for fv in [fill, missing]:
    if fv is not None:
        t2m_uk_box = t2m_uk_box.where(t2m_uk_box != fv)

# Spatial mean (lazy)
t2m_uk = t2m_uk_box.mean(dim=["latitude", "longitude"], skipna=True)

# Kelvin -> Celsius
t2m_uk_c = t2m_uk - 273.15

# Compute 1D series and save
s = t2m_uk_c.compute().to_series()
s.index = pd.to_datetime(s.index, utc=True, errors="coerce")
s = s.dropna()

# Align to exact hour for merging
s.index = s.index.floor("H")
s = s.groupby(s.index).mean()

df_temp = s.to_frame("temp_2m").sort_index()

print("Rows:", len(df_temp))
print("Temp range (C):", df_temp["temp_2m"].min(), "to", df_temp["temp_2m"].max())
print(df_temp.head())
print(df_temp.tail())

df_temp.to_parquet(OUT_PATH)
print("Saved:", OUT_PATH)

ds.close()

  s.index = s.index.floor("H")


Rows: 50400
Temp range (C): 0.06486111421668284 to 22.971969781264647
                            temp_2m
time                               
2020-01-01 00:00:00+00:00  6.526056
2020-01-01 01:00:00+00:00  6.426991
2020-01-01 02:00:00+00:00  6.343664
2020-01-01 03:00:00+00:00  6.290184
2020-01-01 04:00:00+00:00  6.245473
                             temp_2m
time                                
2025-09-30 19:00:00+00:00  13.741258
2025-09-30 20:00:00+00:00  13.531664
2025-09-30 21:00:00+00:00  13.326550
2025-09-30 22:00:00+00:00  13.279802
2025-09-30 23:00:00+00:00  13.170632
Saved: C:\Users\Krist\OneDrive\Documents\Data Analysis\Practice\Carbon\data\processed\uk_temp_hourly.parquet
