# Xarray
## Paul Eldridge

Xarray is a python package that is great for working with multi-dimensional arrays. It is great for working with netCDF files.

In [2]:
import datetime
import numpy as np
import pandas as pd
import xarray as xr


In [5]:
data = 273 + np.random.randn(5, 3, 4) # Creating random "temperatures"
data

array([[[273.28239724, 272.39681535, 272.83836254, 272.01551886],
        [272.50156357, 273.21128587, 274.59331446, 272.68284241],
        [272.76197732, 273.71141569, 272.89262794, 272.63358187]],

       [[271.01922214, 273.25043174, 273.61887705, 272.83974387],
        [273.04405168, 271.65286474, 273.56231077, 273.33372038],
        [273.46419864, 271.71473318, 273.64877137, 273.49002289]],

       [[273.22094163, 272.07816224, 271.74932835, 273.57780827],
        [273.02207703, 272.69236729, 272.56966262, 271.64117828],
        [271.1664246 , 273.70446373, 272.99368795, 274.48808177]],

       [[274.61938129, 272.97288639, 272.97484997, 273.07252471],
        [274.44866978, 273.66325816, 273.02326078, 272.98514814],
        [271.68265849, 272.97898421, 274.08794344, 273.83134043]],

       [[272.5630241 , 270.6628183 , 272.15086606, 273.01293687],
        [273.09927927, 274.60739091, 272.1160282 , 274.75026659],
        [274.59332496, 273.23106199, 272.78791382, 272.29561093]]])

In [6]:
temps = xr.DataArray(data) # Creation of a DataArray
temps

In [29]:
temps = xr.DataArray(data, dims=['time', 'lat', 'lon']) # Assigning dimension names
temps

In [30]:
times = pd.date_range('2018-01-01', periods=5) # creating an array of datetimes
times

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05'],
              dtype='datetime64[ns]', freq='D')

In [32]:
lons = np.linspace(-120, -60, 4) # creating a sample of longitudes
lats = np.linspace(25, 55, 3) # creating a sample of latitudes

temps = xr.DataArray(data, coords=[times, lats, lons], 
                     dims=['time', 'lat', 'lon'])
# creating a DataArray with dimensions and coordinates associated with the temperature values

temps

In [33]:
temps.attrs['units'] = 'kelvin'
temps.attrs['standard_name'] = 'air_temperature'
# Creating ttributes (metadata)

temps

In [34]:
tempC = temps - 273.15 # Converting from kelvin to celcius
tempC

In [35]:
pressureRand = 1013.25 + 2 * np.random.randn(5, 3, 4)
pressures = xr.DataArray(pressureRand, coords= [times, lats, lons],
                        dims= ['time', 'lat', 'lon']
                        )
pressures.attrs['units'] = 'hPa'
pressures.attrs['standard_name'] = 'air_pressure'

# Creating a dataset with pressures

pressures

In [36]:
ds = xr.Dataset(data_vars= {'Temperatures': temps, 'Pressures': pressures})
ds

# Combining the DataArrays into a Dataset. These arrays share the coordinates.

In [37]:
ds.Pressures

'''OR''' # Two ways to call an array

ds['Pressures']

In [38]:
named_selection = temps.sel(time= '2018-01-02')
named_selection
# Using .sel() to select data based on coordinate values. This takes at least
# 1 named coordinate and returns data matching said coordinates.

In [39]:
temps.sel(time= slice('2018-01-01', '2018-01-03'), lon= slice(-110, -70), lat= slice(25, 45))

# In this example,, we are slicing along coordinates for a specific range of times, lats, and lons.

In [22]:
temps.loc['2018-01-01']

In [40]:
temps.loc['2018-01-01':'2018-01-01', 23:45, -110:-70]

# Another selection method, .loc[], similar to pandas

In [42]:
filepath = 'NOAAGlobalTemp_v5.0.0_gridded_s188001_e202212_c20230108T133308.nc'

ds = xr.open_dataset(filepath)
ds

# Importing a dataset into Xarray.