In [None]:
import xarray as xr
import os
import numpy as np

This notebook serves to combine all downloaded nc files (1979 - 2023) to .zarr files.

One of the onshore files is damaged (re-downloading didn't fix things). Therefore, April 2021 is excluded (file renamed .nc -> .nc_damaged)

In [None]:
# Directory containing the .nc files
directory = "/lsdf/kit/imk-tro/projects/Gruppe_Quinting/om1434/onshore"

# List all .nc files in the directory
nc_files = [os.path.join(directory, file) for file in os.listdir(directory) if file.endswith('.nc')]

# Open multiple .nc files into one dataset, concatenating along the time dimension
dataset_by_coords = xr.open_mfdataset(nc_files, combine='by_coords')
# dataset_nested = xr.open_mfdataset(nc_files, combine='nested', concat_dim='time')

# Print the dataset to verify
dataset_by_coords

In [None]:
import xarray as xr
import os

# Directory containing the .nc files
directory = '/lsdf/kit/imk-tro/projects/Gruppe_Quinting/om1434/onshore'

# List all .nc files in the directory
nc_files = [os.path.join(directory, file) for file in os.listdir(directory) if file.endswith('.nc')]

# Save the dataset to a .zarr file
zarr_path = '/lsdf/kit/imk-tro/projects/Gruppe_Quinting/om1434/onshore/onshore.zarr'
# TODO(EliasKng): era5.zarr is saved with time-chunk size of 1. Consider changing to the same.
dataset_by_coords = dataset_by_coords.chunk({'time': 168, 'latitude': 185, 'longitude': 271})
dataset_by_coords.to_zarr(zarr_path)

print(f"Dataset saved to {zarr_path}")

There is one irregularity in the offshore data: between: 2019-12-31 and 2020-01-01.
For 2019-12-31: 10pm and 11pm are missing. However since era5 data is 6hourly, this should not affect training (since we won't use those samples anyways).

In [None]:
import xarray as xr
import numpy as np
dataset = xr.open_dataset('/lsdf/kit/imk-tro/projects/Gruppe_Quinting/om1434/offshore/offshore.zarr')

# Check for irregularities in the time dimension
time_diff = dataset['time'].diff(dim='time')
irregularities = time_diff[time_diff != np.timedelta64(1, 'h')]

if irregularities.size > 0:
    print("Irregularities found in the time dimension:")
    print(irregularities)
    print("Exact date/time of irregularities:")
    print(irregularities['time'].values)
else:
    print("No irregularities found in the time dimension. The frequency is hourly.")


dataset.sel(time='2019-12-31')

In [None]:
dataset = xr.open_dataset('/lsdf/kit/imk-tro/projects/Gruppe_Quinting/om1434/offshore/offshore.zarr', engine='zarr')

In [None]:
era5 = xr.open_dataset("/lsdf/kit/imk-tro/projects/Gruppe_Quinting/ec.era5/1959-2023_01_10-wb13-6h-1440x721.zarr", engine='zarr')

Transform power data and plot on a world map

In [None]:
dataset["longitude"] = dataset["longitude"] % 360
dataset = dataset.sortby("longitude")
target_dataset = dataset.sel(time=random_date + np.timedelta64(24, 'h'))
target_dataset = target_dataset.reindex(
    longitude=era5["longitude"].values, 
    latitude=era5["latitude"].values, 
    method=None)

In [None]:
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(15, 10), subplot_kw={'projection': ccrs.PlateCarree()})
target_dataset.wofcfr.plot(ax=ax, transform=ccrs.PlateCarree())
ax.add_feature(cfeature.BORDERS, linestyle=':')
ax.add_feature(cfeature.COASTLINE)
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(15, 10), subplot_kw={'projection': ccrs.PlateCarree()})
era5_random_date['2m_temperature'].plot(ax=ax, transform=ccrs.PlateCarree())
ax.add_feature(cfeature.BORDERS, linestyle=':')
ax.add_feature(cfeature.COASTLINE)
plt.show()