# NetCDF and xarray

In [4]:
import os
import urllib
import pandas as pd
import numpy as np
import xarray as xr

## Creating Xarray

In [None]:
temp_data = np.array([np.zeros((5,5)),
                      np.ones((5,5)),
                    2*np.ones((5,5))]).astype(int)

print(temp_data)

In [9]:
dims = ('time','lat','lon')
coords = {'time':pd.date_range("2022-09-01","2022-09-03"),
         'lat': np.arange(70,20,-10), # from 70 to 10 with step -10
         'lon': np.arange(60,110,10)}

In [6]:
attrs = {'title':'example temperature data',
         'standard name': 'air_temperature',
         'units': 'degree_c'}

In [12]:
temp = xr.DataArray(data   = temp_data,
                    dims   = dims,
                    coords = coords,
                    attrs  = attrs)
temp

## Updating attributes

In [15]:
# update attributes
temp.attrs['description'] = 'simple example of an xarray.DataArray'

# add attributes to coordinates 
temp.time.attrs = {'description':'date of measurement'}

temp.lat.attrs['standard_name']= 'grid_latitude'
temp.lat.attrs['units'] = 'degree_N'

temp.lon.attrs['standard_name']= 'grid_longitude'
temp.lon.attrs['units'] = 'degree_E'
temp

## Accesing using indexing

In [17]:
# select tempeature data that at given attributes
temp.sel(time = "2022-09-01", lat = 40, lon = 80, method='nearest')

In [18]:
# select tempeature data that at some lat which is not on grided point
temp.sel(time = "2022-09-01", lat = 44, lon = 80, method='nearest')

In [19]:
# select tempeature data that at given range of lat
temp.sel(time = "2022-09-01", lat = np.arange(40,60,10), lon = 80)

In [37]:
# select tempeature data that at given range of date range
temp.sel(time = pd.date_range("2022-09-01","2022-09-02"), lat = 40, lon = 80)

## Reduction method

In [21]:
# taking average over the time dimension, after this, the third dim is gone
avg_temp = temp.mean(dim = 'time')
avg_temp.attrs['title'] = 'average temperature over 3 days'
avg_temp

## Create xarray.DataSet using xarray

In [22]:
data_vars = {'avg_temp': avg_temp,
             'temp':     temp}

attrs ={'title': 'averaged and daily temperature data',
        'description': 'example xarray.DataSet'}

temp_dataset = xr.Dataset(data_vars = data_vars,
                          attrs = attrs)

temp_dataset

## Read and write xarray.DataSet to netCDF

In [23]:
fp = os.path.join(os.getcwd(), 'temp_dataset.nc')
temp_dataset.to_netcdf(fp)

check = xr.open_dataset(fp)
check

## Read in a real ADC dataset

In [24]:
url = 'https://arcticdata.io/metacat/d1/mn/v2/object/urn%3Auuid%3A792bfc37-416e-409e-80b1-fdef8ab60033'

msg = urllib.request.urlretrieve(url, "FW_data_CESM_LW_2006_2100.nc")

In [25]:
fp = os.path.join(os.getcwd(), "FW_data_CESM_LW_2006_2100.nc")
fw_data = xr.open_dataset(fp)
fw_data

## Select specific data array from xarray.DataSet

In [27]:
# select values for second ensemble member of netPrec_annual variable
member2 = fw_data.netPrec_annual.sel(member = 2)
member2

In [39]:
# this only returns the max value
xmax = member2.sel(time = np.arange(2022,2101)).max()
xmax.item() # return the single data and dump all descriptions

# if want to retain the date. i.e. index of the data
xmax = member2.sel(time = np.arange(2022,2101)).max(keepdims = True)
xmax