## Creating xarray datasets "from scratch" when the geospatial data are on an irregular grid

### Import libaries

In [None]:
import xarray as xr
import numpy as np

### Create some fake input data for the data variables and the coordinate variables

In [None]:
nlat, nlon = 10, 5
lats = np.linspace(-90., 90., nlat)
lons = np.linspace(-45., 45., nlon)

#-- create 2D latitude and longitude ndarrays based on meshgrid broadcasting, faking some  irregular grid.
lats2D, lons2D = np.meshgrid(lats, lons, indexing='ij')
lons2D         += 0.5 * lats2D                            #-- modify 2D lon array as a function of lat (optional)

#-- create some 2D ndarray  
mydata1     = np.arange(nlat * nlon).reshape(lons2D.shape)

look at lons2D

In [None]:
print(lons2D.shape)
print(lons2D)

In [None]:
#xr.DataArray(data=mydata1).plot();

### create an xarray DataArray representing one data variable

In [None]:
#--create xarray DataArray with metadata for fake data with an irregular grid (metric coordinate system)
#  in this example, two coordinate variables are available to describe each dimension coordinate
#  latitude(rlat,rlon) in degrees_north
#  latitude2(rlat) in m
da1 = xr.DataArray(data=mydata1, 
                   dims=("rlat", "rlon"), 
                   coords={
                       "latitude":  (("rlat", "rlon"), lats2D, {'long_name': 'latitude', 'units': 'degrees_north'}), 
                       "longitude": (("rlat", "rlon"), lons2D, {'long_name': 'longitude', 'units': 'degrees_east'}),
                       "latitude2": (("rlat"), lats, {'long_name': 'distance to origin in y-direction', 'units': 'm'}),
                       "longitude2": (("rlon"), lons, {'long_name': 'distance to origin in x-direction', 'units': 'm'})}, 
                    attrs={'standard_name': 'carbon_flux', 'units': 'kg m-2 s-2'}
                  )
#-- create a new DataArray by inheriting the attributes, dims, coords from da1
da2 = da1.copy(data=mydata1**2)
#-- overwrite standardname inherited from da1
da2.attrs["standard_name"]= 'nitrogen_flux'         

### Convert the DataArrays into a DataSet

In [None]:
ds = xr.Dataset({'c_flux': da1, 'n_flux': da2})

In [None]:
ds

.... Much more descriptive information is needed that the data can be properly georeferenced and e.g. projected!  
.... This is even painful for advanced python users....

### It is much less pain if you open a netCDF file with xarray where the georeferencing is already CF compliant  

In [None]:
!wget https://github.com/TUBklima/UC2Data/blob/master/tests/test_files/DS_example_multidim.nc
ds_already = xr.open_dataset("DS_example_multidim.nc") 

In [None]:
ds_already

In [None]:
ds_already.isel(time=slice(0, 1),z=0).ta.plot();

In [None]:
ds_already.lon.plot();

In [None]:
#ds['c_flux'].plot()

### Now you could e.g. use rioxarray, cartopy, xESMF, to reproject the data..even this is non-trivial