In [23]:
import os              # module for interacting with the operating system
import xarray as xr    # THIS IS THE LIBRARY WE WILL BE EXPLORING

import numpy as np

In [25]:
fp = os.path.join(os.getcwd(), 'SALT.0001.nc')
data = xr.open_dataset(fp)
data

In [5]:
# data.i4

In [6]:
# data.lat.data.shape

In [7]:
# data.i1

In [8]:
# data.lon

In [9]:
# fp = os.path.join(os.getcwd(), 'FW_data_CESM_LW_2006_2100.nc')
# xr.open_dataset(fp)

In [None]:
##https://databasin.org/datasets/901304723480477baf71cc669bf0714f/
# fp = os.path.join(os.getcwd(), 'historic_ppt.nc')
# xr.open_dataset(fp)

# TO DO:
update arrays to make them bigger, so I can slice

## Creating a 2-dimensional xarray.DataArray

hese variables have an associated meaning: they describe constant/fixed/independent quantities, unlike the varying/measured/dependent quantities that belong in variables. 

In [11]:
# values of a single variable at each point of the coords (it's a numpy.ndarray)
sd_prec_data = np.array([[1,2],[0,1]])  

# names of the data dimensions (tuple of strings)
dims = ('lat','lon')        

# coordinates (tick labels) to use for indexing along each dimension (it's a dictionary)
coords = {'lat' : [10, 20],
          'lon' : [30, 40]}   

# attributes (metadata) of the data array (it's a dictionary)
attrs = { 'title' : 'single day precipitation',
          'units' : 'ml / pixel'}

In [12]:
sd_prec = xr.DataArray(data = sd_prec_data, 
                    dims = dims,
                    coords = coords,
                    attrs = attrs)
sd_prec

In [13]:
# we can also update the attributes (metadata) after creating the set
sd_prec.attrs['description'] = 'a small example of an xarray.DataArray'

# each of the coordinates is also an xarray.DataArra
# so we can add attributes to coordinates 
sd_prec.lat.attrs['standard_name']= 'latitude'
sd_prec.lat.attrs['units'] = 'degrees_north'

sd_prec.lon.attrs['standard_name']= 'longitude'
sd_prec.lon.attrs['units'] = 'degrees_east'
sd_prec

## Example 2: adding one more dimension

In [14]:
# suppose we take measurements for precipitation on 3 days
prec_data = np.array([[[1,2],[0,1]],
                      [[2,2],[2,2]],
                      [[0,0],[0,0]]
                     ])
prec_data[0]

array([[1, 2],
       [0, 1]])

In [15]:
# dimensions
dims = ('day', 'lat', 'lon')

# coordinates (tick labels) to use for indexing along each dimension (dictionary)
coords = {'day' : [1,2,3],
          'lat' : [10, 20],
          'lon' : [30, 40]} 

prec = xr.DataArray(data = prec_data,
                              dims = dims,
                              coords = coords, 
                              attrs = sd_prec.attrs)

prec.lat.attrs = sd_prec.lat.attrs
prec.lon.attrs = sd_prec.lon.attrs
prec.day.attrs = {'standard_name':'day of collection'}
prec

In [16]:
# update title
prec.attrs['title'] = 'precipitation data on days 1, 2, and 3'
prec

### 

## Indexing
READ MORE @ https://docs.xarray.dev/en/stable/user-guide/indexing.html

In [17]:
# multi_day_prec[multi_day_prec.doc == 2]

In [18]:
# multi_day_prec[multi_day_prec.doc == 2]

## Computation
https://rabernat.github.io/research_computing_2018/xarray.html

## Reductions
We can reduce xarray DataArrays along any number of axes:

In [19]:
avg_prec = prec.mean(dim = 'day') # to keep attributes add keep_attrs = True
avg_prec.attrs = {'title':'average precipitation over three days',
                  'units': 'ml/pixel per day'}
avg_prec

# Creating a xarray.DataSet
https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html

A multi-dimensional, in memory, array database.

A dataset resembles an in-memory representation of a NetCDF file, and consists of variables, coordinates and attributes which together form a self describing dataset.

In [20]:
prec_data = xr.Dataset( data_vars = {'avg_prec': avg_prec,
                         'prec': prec},
                        attrs = {'creator_name':'Carmen GG', 
                                 'title':'precipitation data',
                                 'description':'a simple example of an xarray.Dataset'})

In [21]:
prec_data

In [27]:
fp = os.path.join(os.getcwd(),'prec_data.nc')
prec_data.to_netcdf(fp)

# open to check:
check_prec = xr.open_dataset(fp)
check_prec