## NC Climate Data Preprocessing 
| [DWD Url for: Index of /climate_environment/CDC/grids_germany/daily/hyras_de/](https://opendata.dwd.de/climate_environment/CDC/grids_germany/daily/hyras_de/) to gridded Dataset 

first install packages in *Terminal* with `pip install name`

### 1. loading data from DWD
#### 1.1 setting environment

In [1]:
import requests
import gzip
from pathlib import Path

month_dirs = {1: "01_Jan", 2: "02_Feb", 3: "03_Mar", 4: "04_Apr", 5: "05_May", 6: "06_Jun", 
              7: "07_Jul", 8: "08_Aug", 9: "09_Sep", 10: "10_Oct", 11: "11_Nov", 12: "12_Dec"}

#### 1.2 temperature

In [None]:
# loading data
url_t = "https://opendata.dwd.de/climate_environment/CDC/grids_germany/monthly/air_temperature_mean"
root = Path("...GRD_DEU_P1M_T2M-M") # your path!
ascdir = root 
ascdir.mkdir(parents = True, exist_ok = True)
zipdir = root / ".zip" 
zipdir.mkdir(parents = True, exist_ok = True)

for year in range(2000, 2025):  # inkl. 2024
    for month in range(1, 13):
        subdir   = month_dirs[month]          # z.B. "08_Aug"
        yyyymm   = f"{year}{month:02d}"       # z.B. "200008"
        fname    = f"grids_germany_monthly_air_temp_mean_{yyyymm}.asc.gz"

        url_t_n      = f"{url_t}/{subdir}/{fname}"
        out_path = zipdir / fname          # saved in zip-folder

        # proofs, if data exists before
        if out_path.exists():
            print(f"⏭ data exists, ignore: {out_path.name}")
            continue

        print(f"⬇ try to download: {url_t_n}")
        r = requests.get(url_t_n)

        if r.status_code == 200:
            out_path.write_bytes(r.content)
            print(f"  ✓ downloaded & saved: {out_path}")
        else:
            print(f"  ✗ data not found (state: {r.status_code})")

In [None]:
# unzipping

import gzip
from pathlib import Path

for gz_file in zipdir.glob("*.asc.gz"):
    data_path = ascdir / gz_file.name.replace(".gz", "")
    print(f"opening: {data_path}")

    if data_path.exists():
        print(f"⏭ ASC already exists, skipping: {data_path.name}")
        continue

    print(f"  extracting to: {gz_file.name} → {data_path.name}")
    with gzip.open(gz_file, "rb") as f_in:
        data_path.write_bytes(f_in.read())

print("✓ all .asc.gz files processed!")

#### 1.3 precipitation

In [None]:
# loading data
url_p = "https://opendata.dwd.de/climate_environment/CDC/grids_germany/monthly/precipitation"
root = Path(".../GRD_DEU_P1M_RR") # your path!
ascdir = root 
ascdir.mkdir(parents = True, exist_ok = True)
zipdir = root / ".zip" 
zipdir.mkdir(parents = True, exist_ok = True)

for year in range(2000, 2025):  # inkl. 2024
    for month in range(1, 13):
        subdir   = month_dirs[month]          # z.B. "08_Aug"
        yyyymm   = f"{year}{month:02d}"       # z.B. "200008"
        fname    = f"grids_germany_monthly_precipitation_{yyyymm}.asc.gz"

        url_p_n      = f"{url_p}/{subdir}/{fname}"
        out_path = zipdir / fname          # saved in zip-folder

        # proofs, if data exists before
        if out_path.exists():
            print(f"⏭ data exists, ignore: {out_path.name}")
            continue

        print(f"⬇ try to download: {url_p_n}")
        r = requests.get(url_p_n)

        if r.status_code == 200:
            out_path.write_bytes(r.content)
            print(f"  ✓ downloaded & saved: {out_path}")
        else:
            print(f"  ✗ data not found (state: {r.status_code})")

In [None]:
# unzipping
import gzip
from pathlib import Path

for gz_file in zipdir.glob("*.asc.gz"):
    data_path = ascdir / gz_file.name.replace(".gz", "")
    print(f"opening: {data_path}")

    if data_path.exists():
        print(f"⏭ ASC already exists, skipping: {data_path.name}")
        continue

    print(f"  extracting to: {gz_file.name} → {data_path.name}")
    with gzip.open(gz_file, "rb") as f_in:
        data_path.write_bytes(f_in.read())

print("✓ all .asc.gz files processed!")

#### 1.4 radidation

In [None]:
# loading data
url_r = "https://opendata.dwd.de/climate_environment/CDC/grids_germany/monthly/radiation_global"
root = Path(".../GRD_DEU_P1M_RAD-G")  # your path!

ascdir = root 
ascdir.mkdir(parents = True, exist_ok = True)
zipdir = root / ".zip" 
zipdir.mkdir(parents = True, exist_ok = True)

for year in range(2000, 2025):  # inkl. 2024
    for month in range(1, 13):
        yyyymm   = f"{year}{month:02d}"       # z.B. "200008"
        fname    = f"grids_germany_monthly_radiation_global_{yyyymm}.zip"

        url_r_n      = f"{url_r}/{fname}"
        out_path = zipdir / fname          # saved in zip-folder

        # proofs, if data exists before
        if out_path.exists():
            print(f"⏭ data exists, ignore: {out_path.name}")
            continue

        print(f"⬇ try to download: {url_r_n}")
        r = requests.get(url_r_n)

        if r.status_code == 200:
            out_path.write_bytes(r.content)
            print(f"  ✓ downloaded & saved: {out_path}")
        else:
            print(f"  ✗ data not found (state: {r.status_code})")

In [None]:
# unzipping data
import zipfile
from pathlib import Path

for zip_file in zipdir.glob("*.zip"):
    print(f"opening: {zip_file.name}")

    with zipfile.ZipFile(zip_file, "r") as z:
        for member in z.namelist():
            data_path = ascdir / member

            # wenn die Datei schon entpackt wurde → überspringen
            if data_path.exists():
                print(f"⏭ already exists, skipping: {data_path.name}")
                continue

            print(f"  extracting: {member} → {data_path.name}")
            z.extract(member, ascdir)

print("✓ all .zip files extracted!")


### 2. Processing: Merging + NetCDF

In [10]:
import re
import pandas as pd
import rioxarray as  rxr
import xarray as xr
from pathlib import Path
import tempfile

In [None]:
"""
clears all rows above of 'NCOLS'
"""
def open_dwd_ascii_strip_header(path):
    path = Path(path)

    with path.open("r", encoding="ascii", errors="ignore") as src:
        lines = src.readlines()

    start_idx = None
    for i, line in enumerate(lines):
        if line.strip().upper().startswith("NCOLS"):
            start_idx = i
            break

    if start_idx is None:
        raise ValueError(f"no NCOLS-line in {path.name} found")

    esri_part = "".join(lines[start_idx:])
    with tempfile.NamedTemporaryFile(
        suffix=".asc", delete=False, mode="w", encoding="ascii"
    ) as tmp:
        tmp.write(esri_part)
        tmp_path = tmp.name

    da = rxr.open_rasterio(tmp_path).squeeze(drop=True)
    return da

In [None]:
""" 
reading all .asc in folder and builds a time series:
- folder: path with all .asc data (expects data name with 6 numbers YYYYMM)
- var_name: name of variable in result ('temp', 'prec', 'rad') 
"""
def load_dwd_asc_series(folder, var_name):

    folder = Path(folder)
    asc_files = sorted(folder.glob("*.asc"))

    rasters = []
    times = []

    for f in asc_files:
        m = re.search(r"(\d{6})", f.name)
        if not m:
            print(f"⚠️ not YYYYMM in dataname found, skipped: {f.name}")
            continue

        yyyymm = m.group(1)
        t = pd.to_datetime(yyyymm, format="%Y%m")

        da = open_dwd_ascii_strip_header(f)

        da = da.assign_coords(time=t).expand_dims("time")
        da.name = var_name

        rasters.append(da)
        times.append(t)

    if not rasters:
        raise ValueError(f"no fitting .asc files in {folder}")

    da_all = xr.concat(rasters, dim="time").sortby("time")
    return da_all


#### loading DWD-Variables

In [None]:
from pathlib import Path

root_t = Path(r".../GRD_DEU_P1M_T2M-M")
root_p = Path(r".../GRD_DEU_P1M_RR")
root_r = Path(r".../GRD_DEU_P1M_RAD-G")

In [None]:
# building time series
temp  = load_dwd_asc_series(root_t, "temp")   
prec  = load_dwd_asc_series(root_p, "prec")   
rad   = load_dwd_asc_series(root_r, "rad")   

# defining CRS
temp  = temp.rio.write_crs("EPSG:31467", inplace = False)
prec  = prec.rio.write_crs("EPSG:31467", inplace = False)
rad   = rad.rio .write_crs("EPSG:31467", inplace = False)

#### saving

In [None]:
def asc_folder_to_netcdf(folder, var_name, out_nc_path, crs="EPSG:31467"):
    folder = Path(folder)
    out_nc_path = Path(out_nc_path)
    out_nc_path.parent.mkdir(parents=True, exist_ok = True)

    print(f"loading ASC-timeseries for {var_name} from {folder} ...")
    da = load_dwd_asc_series(folder, var_name)

    print(f"writing CRS {crs} ...")
    da = da.rio.write_crs(crs, inplace = False)

    ds = xr.Dataset({var_name: da})

    print(f"saving NetCDF to {out_nc_path} ...")
    ds.to_netcdf(out_nc_path)

    print(f"✔ finished: {out_nc_path}")
    return out_nc_path


In [None]:
from pathlib import Path

root_t = Path(r".../GRD_DEU_P1M_T2M-M")
root_p = Path(r".../GRD_DEU_P1M_RR")
root_r = Path(r".../GRD_DEU_P1M_RAD-G")

out_base = Path(r".../NC")

nc_temp = asc_folder_to_netcdf(root_t, "temp", out_base / "dwd_temp_monthly_31467.nc")
nc_prec = asc_folder_to_netcdf(root_p, "prec", out_base / "dwd_prec_monthly_31467.nc")
nc_rad  = asc_folder_to_netcdf(root_r, "rad",  out_base / "dwd_rad_monthly_31467.nc")

#### plotting

In [None]:
from pathlib import Path
import requests
import gzip
from pathlib import Path
import re
import pandas as pd
import xarray 
from pathlib import Path
import tempfile

ds_t = xr.open_dataset(r".../NC/dwd_temp_monthly_degree_31467.nc")
ds_p = xr.open_dataset(r".../NC/dwd_prec_monthly_31467.nc")
ds_r = xr.open_dataset(r".../NC/dwd_rad_monthly_31467.nc")

In [None]:
ds_t["temp"].mean(dim = ("x", "y")).plot()
ds_t["temp"].isel(time=slice(0,12)).plot(col="time", col_wrap=4)

In [None]:
ds_p["prec"].mean(dim = ("x", "y")).plot()
ds_p["prec"].isel(time=slice(0,12)).plot(col="time", col_wrap=4)

In [None]:
ds_r["rad"].mean(dim = ("x", "y")).plot()
ds_r["rad"].isel(time=slice(0,12)).plot(col="time", col_wrap=4)

#### converting in °C

In [5]:
ds_t["temp"] = (ds_t["temp"] * 0.1).assign_attrs(
    units="°C",
    long_name="Monthly mean 2m air temperature (°C)"
)

In [None]:
ds_t.to_netcdf(r".../NC/dwd_temp_monthly_degree_31467.nc")

print("✔ file saved!")

✔ Datei gespeichert!


### 3. clipping to RLP and biosphere reservat layer

In [None]:
import geopandas as gpd
import re
import pandas
import rioxarray
import xarray as xr
from pathlib import Path

rlp = gpd.read_file(r"E:/Hiwi/data/Conservation_types(.csv)/RLP_Grenze.gpkg")  
bsr = gpd.read_file(r"E:/Hiwi/data/Conservation_types(.csv)/Biosphärenreservat.gpkg")
bsr = bsr.to_crs("EPSG:31467")
rlp = rlp.to_crs("EPSG:31467")

ds_t = xr.open_dataset(r".../NC/dwd_temp_monthly_degree_31467.nc")
ds_p = xr.open_dataset(r".../NC/dwd_prec_monthly_31467.nc")
ds_r = xr.open_dataset(r".../NC/dwd_rad_monthly_31467.nc")

ds_t = ds_t.rio.write_crs("EPSG:31467")
ds_p = ds_p.rio.write_crs("EPSG:31467")
ds_r = ds_r.rio.write_crs("EPSG:31467")


In [2]:
temp_rlp = ds_t["temp"].rio.clip(rlp.geometry, rlp.crs, drop=True)
prec_rlp = ds_p["prec"].rio.clip(rlp.geometry, rlp.crs, drop=True)
rad_rlp  = ds_r["rad"] .rio.clip(rlp.geometry, rlp.crs, drop=True)

temp_bsr = ds_t["temp"].rio.clip(bsr.geometry, bsr.crs, drop=True)
prec_bsr = ds_p["prec"].rio.clip(bsr.geometry, bsr.crs, drop=True)
rad_bsr  = ds_r["rad"] .rio.clip(bsr.geometry, bsr.crs, drop=True)

In [None]:
temp_rlp.to_netcdf(r".../NC/dwd_temp_monthly_degree_31467_rlp.nc")
prec_rlp.to_netcdf(r".../NC/dwd_prec_monthly_31467_rlp.nc")
rad_rlp .to_netcdf(r".../NC/dwd_rad_monthly_31467_rlp.nc")

temp_bsr.to_netcdf(r".../NC/dwd_temp_monthly_degree_31467_bsr.nc")
prec_bsr.to_netcdf(r".../NC//dwd_prec_monthly_31467_bsr.nc")
rad_bsr .to_netcdf(r".../NC/dwd_rad_monthly_31467_bsr.nc")

### 4. plotting everything

In [None]:
import geopandas as gpd
import xarray as xr
from pathlib import Path

ds_t = xr.open_dataset(r".../NC/dwd_temp_monthly_degree_31467.nc")
ds_p = xr.open_dataset(r".../NC/dwd_prec_monthly_31467.nc")
ds_r = xr.open_dataset(r".../NC/dwd_rad_monthly_31467.nc")

ds_t_rlp = xr.open_dataset(r".../NC/dwd_temp_monthly_degree_31467_rlp.nc")
ds_p_rlp = xr.open_dataset(r".../NC/dwd_prec_monthly_31467_rlp.nc")
ds_r_rlp = xr.open_dataset(r".../NC/dwd_rad_monthly_31467_rlp.nc")

ds_t_bsr = xr.open_dataset(r".../NC/dwd_temp_monthly_degree_31467_bsr.nc")
ds_p_bsr = xr.open_dataset(r".../NC/dwd_prec_monthly_31467_bsr.nc")
ds_r_bsr = xr.open_dataset(r".../NC/dwd_rad_monthly_31467_bsr.nc")

### raster image

In [None]:
ds_t_rlp["temp"].isel(time=slice(0,12)).plot(col="time", col_wrap=4)
ds_p_rlp["prec"].isel(time=slice(0,12)).plot(col="time", col_wrap=4)
ds_r_rlp["rad"] .isel(time=slice(0,12)).plot(col="time", col_wrap=4)

ds_t_bsr["temp"].isel(time=slice(0,12)).plot(col="time", col_wrap=4)
ds_p_bsr["prec"].isel(time=slice(0,12)).plot(col="time", col_wrap=4)
ds_r_bsr["rad"] .isel(time=slice(0,12)).plot(col="time", col_wrap=4)

### time series for the total mean of each variable

In [None]:
import matplotlib.pyplot as plt

def plot_with_trend(da, title=None, ylabel=None, color="tab:blue", trend_color="red"):

    ts = da.mean(dim=("x", "y"))

    # Regression
    coeffs = ts.polyfit(dim="time", deg=1)
    trend = xr.polyval(ts["time"], coeffs.polyfit_coefficients)

    # Plot
    fig, ax = plt.subplots(figsize=(8, 4))
    ts.plot(ax=ax, label="mean)", color = color)
    trend.plot(ax=ax, label="linear trend", linestyle="--", color = trend_color)

    if title is not None:
        ax.set_title(title)
    if ylabel is not None:
        ax.set_ylabel(ylabel)

    ax.legend()
    ax.grid(True)
    plt.tight_layout()
    plt.show()

# Beispiele für BSR
plot_with_trend(ds_t_bsr["temp"], title="BSR: monthly temperature (area mean)", ylabel="°C", color="#de74a2", trend_color="#b81b60")
plot_with_trend(ds_p_bsr["prec"], title="BSR: monthly precipitation (area mean)", ylabel="mm", color="#90c3dd", trend_color="#1a5771")
plot_with_trend(ds_r_bsr["rad"],  title="BSR: monthly radiation (area mean)",   ylabel="W/m²", color="#f7b101", trend_color="#bc7e1b")


### mean and std raster of each variable for each month

In [4]:
from pathlib import Path

out_dir = Path(r"E:/Hiwi/DWD/NC/output/bsr_monthly_means")
out_dir.mkdir(parents=True, exist_ok=True)

In [None]:
temp = ds_t_bsr["temp"]               
temp_monthly_mean = temp.groupby("time.month").mean(dim="time")
temp_monthly_mean = temp_monthly_mean.rio.write_crs("EPSG:31467")
temp_monthly_mean = temp_monthly_mean.transpose("month", "y", "x")
temp_monthly_mean.rio.to_raster(out_dir / "bsr_temp_monthly_mean.tif")


prec = ds_p_bsr["prec"]
prec_monthly_mean = prec.groupby("time.month").mean(dim="time")
prec_monthly_mean = prec_monthly_mean.rio.write_crs("EPSG:31467")
prec_monthly_mean = prec_monthly_mean.transpose("month", "y", "x")
prec_monthly_mean.rio.to_raster(out_dir / "bsr_prec_monthly_mean.tif")


rad = ds_r_bsr["rad"]
rad_monthly_mean = rad.groupby("time.month").mean(dim="time")
rad_monthly_mean = rad_monthly_mean.rio.write_crs("EPSG:31467")
rad_monthly_mean = rad_monthly_mean.transpose("month", "y", "x")
rad_monthly_mean.rio.to_raster(out_dir / "bsr_rad_monthly_mean.tif")

# standard divariation

temp = ds_t_bsr["temp"]               
temp_monthly_std = temp.groupby("time.month").std(dim="time")
temp_monthly_std = temp_monthly_std.rio.write_crs("EPSG:31467")
temp_monthly_std = temp_monthly_std.transpose("month", "y", "x")
temp_monthly_std.rio.to_raster(out_dir / "bsr_temp_monthly_mean.tif")


prec = ds_p_bsr["prec"]
prec_monthly_std = prec.groupby("time.month").std(dim="time")
prec_monthly_std = prec_monthly_std.rio.write_crs("EPSG:31467")
prec_monthly_std = prec_monthly_std.transpose("month", "y", "x")
prec_monthly_std.rio.to_raster(out_dir / "bsr_prec_monthly_mean.tif")


rad = ds_r_bsr["rad"]
rad_monthly_std = rad.groupby("time.month").std(dim="time")
rad_monthly_std = rad_monthly_std.rio.write_crs("EPSG:31467")
rad_monthly_std = rad_monthly_std.transpose("month", "y", "x")
rad_monthly_std.rio.to_raster(out_dir / "bsr_rad_monthly_mean.tif")


#### saving as .tif

In [15]:
for m in range(1, 13):
    # Temperatur
    temp_m = temp_monthly_mean.sel(month=m)
    temp_m.rio.to_raster(out_dir / f"bsr_temp_mean_month_{m:02d}.tif")

    # Niederschlag
    prec_m = prec_monthly_mean.sel(month=m)
    prec_m.rio.to_raster(out_dir / f"bsr_prec_mean_month_{m:02d}.tif")

    # Strahlung
    rad_m = rad_monthly_mean.sel(month=m)
    rad_m.rio.to_raster(out_dir / f"bsr_rad_mean_month_{m:02d}.tif")


out_dir = Path(r"E:/Hiwi/DWD/NC/output/bsr_monthly_std")
out_dir.mkdir(parents=True, exist_ok=True)

for m in range(1, 13):
    # Temperatur
    temp_m = temp_monthly_std.sel(month=m)
    temp_m.rio.to_raster(out_dir / f"bsr_temp_std_month_{m:02d}.tif")

    # Niederschlag
    prec_m = prec_monthly_std.sel(month=m)
    prec_m.rio.to_raster(out_dir / f"bsr_prec_std_month_{m:02d}.tif")

    # Strahlung
    rad_m = rad_monthly_std.sel(month=m)
    rad_m.rio.to_raster(out_dir / f"bsr_rad_std_month_{m:02d}.tif")

In [22]:
import pandas as pd

def monthly_stats(da):
    ts = da.mean(dim=("x", "y"))  # area mean

    df = pd.DataFrame({
        "mean": ts.groupby("time.month").mean().values,
        "std": ts.groupby("time.month").std().values
    })

    df.index = range(1, 13)
    df.index.name = "month"
    return df

# Beispiel Temperatur BSR
df_t = monthly_stats(ds_t_bsr["temp"])
df_p = monthly_stats(ds_p_bsr["prec"])
df_r = monthly_stats(ds_r_bsr["rad"])

print(df_t)
print(df_p)
print(df_r)

out_dir = Path(r"E:/Hiwi/DWD/NC/output")
out_dir.mkdir(parents=True, exist_ok=True)

df_t.to_csv(out_dir / "bsr_temp_monthly_stats.csv")
df_p.to_csv(out_dir / "bsr_prec_monthly_stats.csv")
df_r.to_csv(out_dir / "bsr_rad_monthly_stats.csv")

print("Saved to:", out_dir.absolute())
print("Exists:", out_dir.exists())

            mean       std
month                     
1       1.654713  2.067297
2       2.844765  2.495236
3       5.901751  1.651230
4       9.994435  1.807431
5      13.952507  1.527919
6      17.848197  1.389551
7      19.247105  1.647942
8      18.840152  1.731219
9      14.803395  1.657592
10     10.557222  1.624670
11      5.764917  1.205090
12      2.718628  1.822824
            mean        std
month                      
1      77.471457  32.226636
2      66.204058  39.927300
3      63.847937  36.357385
4      47.048655  25.807940
5      76.423924  38.250555
6      64.735605  30.560202
7      71.431368  37.151597
8      71.297713  37.213006
9      60.444484  28.480695
10     70.671368  34.477274
11     73.531547  35.125947
12     88.139731  43.417644
             mean        std
month                       
1       25.006451   4.259821
2       44.058095   8.256830
3       86.158563  13.608779
4      129.068333  19.353434
5      158.033454  18.979029
6      176.106763  17.15895