This notebook demonstrates how to calculate and plot eFold distance of inter-site temperature correlation using the eFold module. The source data for this example is ECMWF's ERA20C Monthly temperature dataset (https://ecmwf.int/en/forecasts/datasets/reanalysis-datasets/era-20c).

The source code in the notebook converts the NETCDF dataset into a set of times and temperatures, then uses the eFold module to filter the dataset, calculate the correlations between sites, then calculate and plot the eFold distances between the correlations.

Working with larger datasets on memory- or CPU-limited computers can be slow. There are several checkpoints in the notebook where processed data can be saved or restored via a pickle file, to reduce times of future runs.

In [None]:
#import system packages
import matplotlib.pyplot as plt
import netCDF4
from netCDF4 import Dataset
import pandas as pd
import numpy as np
from scipy.signal import butter, filtfilt
import cartopy.crs as ccrs
import cartopy.feature as cf
import cartopy
import sys
import io
import warnings
import pickle

In [None]:
#import local modules
from eFold import binTime
from eFold import bandPassFilter
from eFold import binAndFilter
from eFold import eFoldingDistance
from eFold import calcEFold
from eFold import plotMap


In [None]:
#Load the NETCDF dataset and load variables latiture, longitude, time, and t2m
rootGrp = netCDF4.Dataset('era20c_t2m_1981_2010_anom.nc', 'r')

latitude=rootGrp.variables['latitude']
longitude=rootGrp.variables['longitude']
time=rootGrp.variables['time']
t2m=rootGrp.variables['t2m']

In [None]:
# Manipulate time varaiable into a numpy array. time variable units is months since Jan 190
firstYear  = 1900
numYears = int(len(time)/12)
years = range(firstYear,firstYear+numYears)
months = np.tile(np.arange(1/24,1.0,1/12),(numYears,1))
years = np.tile(years,(12,1)).transpose()
times = (months+years).flatten()

In [None]:
# Prepare to create site list
siteList = []
# WARNING: The source datafile contains readings for locations covering a grid over the 
# globe, with 128 latitudes x 256 longitudes, resulting in 32768 locations.
# To calculate correlations between all these sites, a 64 GB correlation matrix is 
# needed. If running on a memory- or CPU-limited machine, it will be necessary to reduce the 
# number of sites. This can be done here by setting the increment value.
# (e.g., set both to 2 to reduce sites by 3/4, 3 to reduce by 8/9, etc)
latIncrement=1  # Take every latitude 
lonIncrement=1  # Take every longitude

In [None]:
for i in range(0,len(latitude),latIncrement):
    lat = latitude[i]
    for j in range(0,len(longitude),lonIncrement):
        lon = longitude[j]
        siteDict = {}
        siteDict['latitude'] = lat
        siteDict['longitude'] = lon
        siteDict['sitename'] = str(lat)+','+str(lon)
        siteDict['times'] = times

        # Get all the temperature readings for this location
        temps = np.array(t2m[:,i,j].filled(np.NaN))

        # Replace all missing readings with np.NaN
        temps[temps<=-32767.0] = np.NaN

        # Center all the data by subtracting the mean from each value
        meanTemps = temps - temps.mean()

        siteDict['temps'] = meanTemps
        siteList.append(siteDict)


print('Num sites',len(siteList))

In [None]:
# Uncomment the following to save the siteList for later

#outPickleFid='ipynb.sitelist.era20.pickle.dat'
#pickle.dump(siteList, open(outPickleFid,'wb'))

# If running with previously loaded data, uncomment and run:

#pickleFidIn = 'ipynb.sitelist.era20.dat'
#siteList = pickle.load(open(pickleFidIn,'rb'))

In [None]:
# bin all the temperatures into years, and filter out the low frequency repetitions
pickleFidOut = 'ipynb.filtered.era20.pickle.dat'
filteredArray = binAndFilter( siteList, timeStart=1900,timeEnd=2011,timeStep=1,highBandPass=True,replaceNaN=True,replaceNaNDivisor=12,outFid=pickleFidOut )

In [None]:
# Calculate correlations between each pair of sites and the eFold distance for each site
eFold = calcEFold( siteList, filteredArray )

In [None]:

# Plot the data on a map
# Put the data into usable lists                     
lons = []                                            
lats = []                                            
r2 = []                                              
eFoldDistance = []                                   
for i in range(0,len(eFold)):                        
    lons.append(eFold[i]['lon'])                     
    lats.append(eFold[i]['lat'])
    r2.append(eFold[i]['r2'])
    eFoldDistance.append(eFold[i]['eFoldDistance'])

plt.rcParams['figure.figsize']=[10,5]
plotMap( lats, lons, eFoldDistance,plotTitle='ERA20',dataLabel='e-Folding Distance (km)' )
plotMap( lats, lons,r2,plotTitle='ERA20',dataLabel='$r^2$ for eFoldDistance',dotSize=10)  
