## Tutorial for multivariate climate data

Import required packages

In [3]:
import os
import sys
import time
import warnings
import xarray as xr
import numpy  as np
from pathlib import Path

# Import library specific modules
sys.path.append("../../")
from library.spod_low_storage import SPOD_low_storage
from library.spod_low_ram     import SPOD_low_ram
from library.spod_streaming   import SPOD_streaming
import library.utils as spod_utils
import library.utils as utils

# Current, parent and file paths
CWD = os.getcwd()

/Users/gian/GIT-GM/pyspod/tutorials/climate


In [6]:
# Inspect and load data 
file = '/Users/gian/Desktop/SEOF_reanalysis-master/data/E20C/E20C_MONTHLYMEAN00_1900_2010_MEI.nc'

# Data is in netCDF format: 
# we use xarray to load the dataset and extract its content
ds = xr.open_dataset(file)

# Let's take a look at its content
print(ds)

<xarray.Dataset>
Dimensions:    (latitude: 121, longitude: 240, time: 1332)
Coordinates:
  * longitude  (longitude) float32 0.0 1.5 3.0 4.5 ... 354.0 355.5 357.0 358.5
  * latitude   (latitude) float32 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0
  * time       (time) datetime64[ns] 1900-01-01 1900-02-01 ... 2010-12-01
Data variables:
    sst        (time, latitude, longitude) float32 ...
    msl        (time, latitude, longitude) float32 ...
    tcc        (time, latitude, longitude) float32 ...
    u10        (time, latitude, longitude) float32 ...
    v10        (time, latitude, longitude) float32 ...
    t2m        (time, latitude, longitude) float32 ...
Attributes:
    Conventions:  CF-1.6
    history:      2020-10-25 20:57:11 GMT by grib_to_netcdf-2.19.0: grib_to_n...


the netCDF file contains 3 coordinates:
- longitude
- latitude, and 
- time, 
along with 6 variables
- sst (sea surface temperature)
- msl (mean seal level pressure)
- tcc (total cloud cover)
- u10 (horizontal velocity; u-component)
- v10 (horizontal velocity; v-component)
- t2m (2-meters temperature)

In [8]:
# we set the variables we want to use for the analysis
# (we select all the variables present)
variables = ['sst', 'msl', 'tcc', 'u10', 'v10', 't2m']

# we extract time, longitude and latitude
t = np.array(ds['time'])
x1 = np.array(ds['longitude'])
x2 = np.array(ds['latitude'])

# and load data in RAM 
X = np.empty([t.shape[0], x1.shape[0], x2.shape[0], n_vars])
for i,var in enumerate(variables):
	X[...,i] = np.einsum('ijk->ikj', np.array(ds[var]))
	X[...,i] = np.nan_to_num(X[...,i])

# let's take a look at the shape of the data loaded in memory
print(X.shape)

(1332, 240, 121, 6)


Note that: 
- the first dimension is 1332, 
- the second is 240,
- the third is 121, and 
- the fourth is 6. 

Indeed, we use as first dimension the number of time snapshots, as last dimension the number of variables and as middle dimensions, the spatial dimensions, that in our case are two and correspond to longitude and latitude.

In [None]:
# define the number of variables 
# and the time snapshots to 
# use for the analysis
n_vars = len(variables)
nt = 300

# Define required parameters
dt = 720       # in hours
T_approx = 876 # approximate period (in days)
overlap_in_percent = 0
block_dimension = dt * 12 * 5

params = dict()
params['dt'          ] = dt
params['nt'          ] = nt
params['xdim'        ] = 3
params['nv'          ] = n_vars
params['n_FFT'       ] = np.ceil(block_dimension / dt)
params['n_freq'      ] = params['n_FFT'] / 2 + 1
params['n_overlap'   ] = np.ceil(params['n_FFT'] * overlap_in_percent / 100)
params['savefreqs'   ] = np.arange(0,params['n_freq'])
params['n_modes_save'] = 3
params['normvar'     ] = False
params['savedir'     ] = os.path.join(CWD, 'results', Path(file).stem)
params['conf_level'  ] = 0.95
params['normalize'   ] = True
params['mean'        ] = 'blockwise'
params['savefft'     ] = False
# Get weights from spod_utils built-in function for geospatial data
params['weights'] = spod_utils.geo_weights_trapz_2D(\
lat=x2, lon=x1, R=1, n_vars=n_vars)
if params['normalize']:
	params['weights'] = spod_utils.apply_normalization(\
		X=X, weights=params['weights'], method='variance')