# Composite maps

In this notebook we analyze the composite maps computed on the ERA5 dataset.

If you haven't downloaded the data yet, follow the instructions in `Data_ERA5/README.md` and `Data_ERA5/preprocess.ipynb`

If you have already computed the composites you can skip to section "Compute metrics"

If you have already computed the metrics, you can skip to "Analysis"

In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib widget
matplotlib.rc('font', size=18)
import pandas as pd
default_colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

import os
import xarray as xr
from tqdm.notebook import tqdm

from importlib import reload

import sys
sys.path.append('../../../Climate-Learning/')
import general_purpose.utilities as ut

import general_purpose.tables as tbl

HOME = '../../'

In [None]:
def l2(x, **kwargs):
    return np.sqrt(np.sum(x**2, **kwargs))

def split_acf(A,lag):
    return np.corrcoef(A[...,lag:].flatten(), A[...,:-lag].flatten())[0,1] if lag else 1

## Compute composites

Run the script `compute_composites_ERA5.py`

## Compute metrics

In [None]:
root_folder = 'ERA5/y83'

Model = 'ERA5'

# Northern Hemisphere
sector = ''
mask = 1

In [None]:
df = []
item = {}
reshaper = None

lat = np.load(f'{root_folder}/lat.npy')
coslat = np.cos(lat*np.pi/180)
for s in tqdm(os.listdir(root_folder)):
    if not s.startswith('T'):
        continue
    item['T'] = int(s[1:])
    f = f'{root_folder}/{s}'
    A = np.load(f'{f}/A.npy')
    item['r'] = r = split_acf(A,1)
    item['T_decorr'] = int((1+r)/(1-r) + 0.5)
    for ss in os.listdir(f):
        if not ss.startswith('tau'):
            continue
        item['tau'] = int(ss[3:])
        ff = f'{f}/{ss}'
        X_std = np.load(f'{ff}/X_std.npy') * mask
        
        field_dimensions = tuple(range(len(X_std.shape) - 1)) # number of dimensions for each field (should be 2, lon and lat)
        pixels_per_field = np.sum(X_std > 0, axis=field_dimensions)
        X_std_fm = np.sqrt(np.sum(X_std**2, axis=field_dimensions)/pixels_per_field)
        assert X_std_fm.shape == X_std.shape[-1:], 'Shape mismatch!'
        
        area_weights = (np.ones_like(X_std).T * coslat).T
        area_weights *= (X_std > 0)
        area_weights /= np.sum(area_weights)
        fieldwise_area_weights = area_weights / np.sum(area_weights, axis=field_dimensions)
        
        assert np.allclose(np.sum(area_weights), 1), 'Not normalized area weights'
        assert np.allclose(np.sum(fieldwise_area_weights, axis=field_dimensions), 1), 'Not normalized fieldwise area weights'
        
        if reshaper is None:
            reshaper = ut.Reshaper(X_std != 0)
            print(reshaper.surviving_coords)
        for sss in os.listdir(ff):
            if not sss.startswith('percent'):
                continue
            fff = f'{ff}/{sss}'
            item['percent'] = float(sss[7:])
            item['a'] = np.load(f'{fff}/threshold.npy')[0]
            try:
                # raise FileNotFoundError()
                nr = np.load(f'{fff}/{sector}norm_ratio.npy')
                nrgn = np.load(f'{fff}/{sector}norm_ratio_global_normalization.npy')
                n = np.load(f'{fff}/{sector}norm.npy')
                en = np.load(f'{fff}/{sector}error_norm.npy')
                for _ in [nr, nrgn, n, en]:
                    if _.shape != X_std.shape[-1] + 1:
                        raise FileNotFoundError()
                
            except FileNotFoundError:
                # compute norm ratio
                nr = np.zeros(X_std.shape[-1] + 1)
                n = np.zeros(X_std.shape[-1] + 1)
                en = np.zeros(X_std.shape[-1] + 1)
                comp = np.load(f'{fff}/X_comp.npy')*mask
                comp_ga = np.load(f'{fff}/X_comp_GA.npy')*mask
                
                n[:-1] = l2(comp, axis=field_dimensions)#/np.sqrt(pixels_per_field)
                n[-1] = l2(comp)#/np.sqrt(np.sum(pixels_per_field))
                np.save(f'{fff}/{sector}norm.npy', n)
                en[:-1] = l2(comp - comp_ga, axis=field_dimensions)#/np.sqrt(pixels_per_field)
                en[-1] = l2(comp - comp_ga)#/np.sqrt(np.sum(pixels_per_field))
                np.save(f'{fff}/{sector}error_norm.npy', en)
                nr[:-1] = l2(comp - comp_ga, axis=field_dimensions)/l2(comp, axis=field_dimensions)
                nr[-1] = l2(comp - comp_ga)/l2(comp)
                np.save(f'{fff}/{sector}norm_ratio.npy', nr)
                
                nrgn = np.zeros(X_std.shape[-1] + 1)
                comp *= X_std/X_std_fm
                comp_ga *= X_std/X_std_fm
                nrgn[:-1] = l2(comp - comp_ga, axis=field_dimensions)/l2(comp, axis=field_dimensions)
                nrgn[-1] = l2(comp - comp_ga)/l2(comp)
                np.save(f'{fff}/{sector}norm_ratio_global_normalization.npy', nrgn)
            
            item['total_norm_ratio'] = nr[-1]
            item['fieldwise_norm_ratio'] = nr[:-1]
            item['total_norm_ratio_gn'] = nrgn[-1]
            item['fieldwise_norm_ratio_gn'] = nrgn[:-1]
            item['total_norm'] = n[-1]
            item['fieldwise_norm'] = n[:-1]
            item['total_error_norm'] = en[-1]
            item['fieldwise_error_norm'] = en[:-1]
            
            # area weighted metrics
            try:
                # raise FileNotFoundError()
                aw_nr = np.load(f'{fff}/{sector}aw_norm_ratio.npy')
                aw_nrgn = np.load(f'{fff}/{sector}aw_norm_ratio_global_normalization.npy')
                aw_n = np.load(f'{fff}/{sector}aw_norm.npy')
                aw_en = np.load(f'{fff}/{sector}aw_error_norm.npy')
                for _ in [aw_nr, aw_nrgn, aw_n, aw_en]:
                    if _.shape != X_std.shape[-1] + 1:
                        raise FileNotFoundError()
                
            except FileNotFoundError:
                # compute norm ratio
                aw_nr = np.zeros(X_std.shape[-1] + 1)
                aw_n = np.zeros(X_std.shape[-1] + 1)
                aw_en = np.zeros(X_std.shape[-1] + 1)
                comp = np.load(f'{fff}/X_comp.npy')*mask
                comp_ga = np.load(f'{fff}/X_comp_GA.npy')*mask
                
                aw_n[:-1] = l2(comp*np.sqrt(fieldwise_area_weights), axis=field_dimensions)#/np.sqrt(pixels_per_field)
                aw_n[-1] = l2(comp*np.sqrt(area_weights))#/np.sqrt(np.sum(pixels_per_field))
                np.save(f'{fff}/{sector}aw_norm.npy', aw_n)
                aw_en[:-1] = l2((comp - comp_ga)*np.sqrt(fieldwise_area_weights), axis=field_dimensions)#/np.sqrt(pixels_per_field)
                aw_en[-1] = l2((comp - comp_ga)*np.sqrt(area_weights))#/np.sqrt(np.sum(pixels_per_field))
                np.save(f'{fff}/{sector}aw_error_norm.npy', aw_en)
                aw_nr = aw_en/aw_n
                np.save(f'{fff}/{sector}aw_norm_ratio.npy', aw_nr)
                
                aw_nrgn = np.zeros(X_std.shape[-1] + 1)
                comp *= X_std/X_std_fm
                comp_ga *= X_std/X_std_fm
                aw_nrgn[:-1] = l2((comp - comp_ga)*np.sqrt(fieldwise_area_weights), axis=field_dimensions)/l2((comp)*np.sqrt(fieldwise_area_weights), axis=field_dimensions)
                aw_nrgn[-1] = l2((comp - comp_ga)*np.sqrt(area_weights))/l2((comp)*np.sqrt(area_weights))
                np.save(f'{fff}/{sector}aw_norm_ratio_global_normalization.npy', aw_nrgn)
            
            item['total_aw_norm_ratio'] = aw_nr[-1]
            item['fieldwise_aw_norm_ratio'] = aw_nr[:-1]
            item['total_aw_norm_ratio_gn'] = aw_nrgn[-1]
            item['fieldwise_aw_norm_ratio_gn'] = aw_nrgn[:-1]
            item['total_aw_norm'] = aw_n[-1]
            item['fieldwise_aw_norm'] = aw_n[:-1]
            item['total_aw_error_norm'] = aw_en[-1]
            item['fieldwise_aw_error_norm'] = aw_en[:-1]
            
            # compute relative error quantile:
            try:
                sorted_rel_error = np.load(f'{fff}/sorted_{sector}rel_error.npy')
            except FileNotFoundError:
                comp = reshaper.reshape(np.load(f'{fff}/X_comp.npy'))
                comp_ga = reshaper.reshape(np.load(f'{fff}/X_comp_GA.npy'))
                rel_error = np.abs((comp_ga/comp - 1))
                sorted_rel_error = np.sort(rel_error)
                np.save(f'{fff}/sorted_{sector}rel_error.npy', sorted_rel_error)
            
            item['sorted_rel_error'] = sorted_rel_error
            
            # compute significance of the error
            try:
                significance = reshaper.reshape(np.load(f'{fff}/{sector}significance.npy'))
            except FileNotFoundError:
                comp = reshaper.reshape(np.load(f'{fff}/X_comp.npy'))
                comp_ga = reshaper.reshape(np.load(f'{fff}/X_comp_GA.npy'))
                comp_std = reshaper.reshape(np.load(f'{fff}/X_comp_std.npy'))
                significance = (comp_ga - comp)/comp_std # this is the raw significance without accounting for the number of independent events
                np.save(f'{fff}/{sector}significance.npy', reshaper.inv_reshape(significance))
                
            item['significance'] = significance
            item['sorted_significance'] = np.sort(np.abs(significance))
            
            # here we compute the number of independent events
            Y = np.load(f'{fff}/Y.npy')
            item['N_heatwaves'] = np.sum(Y)
            item['N_heatwave_years'] = np.sum(np.max(Y, axis=1))
            
            N_ind_heatwaves = 0 # we consider two events independent if they are more than T days apart, i.e. they don't share data to compute A
            for yr in Y:
                i = 0
                while i < len(yr):
                    if yr[i]:
                        i += item['T']
                        N_ind_heatwaves += 1
                    else:
                        i += 1
                        
            item['N_ind_heatwaves'] = N_ind_heatwaves
            
            N_decorr_heatwaves = 0 # we consider two events independent if they are more than T_decorr days apart, i.e. A has had time to decorrelate
            # here we need to account for the possibility of having to skip more than one year
            i = 0
            for yr in Y:
                if i >= len(yr): # we skip the year directly
                    i = max(0,i-360)
                    continue
                while i < len(yr):
                    if yr[i]:
                        i += item['T_decorr']
                        N_decorr_heatwaves += 1
                    else:
                        i += 1
                i = max(0,i-360) # we finished this year, now we prepare for next year
                        
            item['N_decorr_heatwaves'] = N_decorr_heatwaves
            
            df.append(item.copy())
            
df = pd.DataFrame(df)

# sort the dataset
df.sort_values(['T', 'tau', 'percent'], inplace=True)
df

In [None]:
# reshape into xarray:
T_ind = np.sort(list(set(df['T']))) ; print(T_ind)
tau_ind = np.sort(list(set(df['tau']))) ; print(tau_ind)
percent_ind = np.sort(list(set(df['percent']))) ; print(percent_ind)
nT, ntau, npercent = len(T_ind), len(tau_ind), len(percent_ind)
npixels = len(sorted_rel_error)
quantile = np.arange(npixels)/npixels
pixel = np.arange(npixels)

# field_names = ['t2m','zg500', 'mrso_filtered'] if root_folder.startswith('PLASIM') else ['zg500']
field_names = ['zg500']

nfields = len(field_names)
fields_ind = np.arange(nfields)

In [None]:
template_row = df.iloc[0] * np.nan
for T in T_ind:
    for tau in tau_ind:
        for percent in percent_ind:
            comp_rows = np.sum((df['T'] == T ) * (df['tau'] == tau) * (df['percent'] == percent))
            if comp_rows == 1:
                continue
            elif comp_rows > 1:
                print(f'DUPLICATE ROW at {(T,tau,percent) = }')
                
            print(f'Inserting NaNs at {(T,tau,percent) = }')
            template_row['T'] = T
            template_row['tau'] = tau
            template_row['percent'] = percent
            df.loc[len(df)] = template_row.copy()      
            
df.sort_values(['T', 'tau', 'percent'], inplace=True)
df

In [None]:
ds = xr.Dataset({
    'area_weights':        xr.DataArray(reshaper.reshape(area_weights),
                                        coords={'pixel': pixel},
                                        attrs={'description': 'normalized grid cell area weights'},
                                       ),
    'r':                   xr.DataArray(np.array(df['r']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': 'Lag-1 autocorrelation coefficient of A'}
                                       ).sel(tau=0,percent=5), # r depends only on T
    'T_decorr':            xr.DataArray(np.array(df['T_decorr']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': 'Decorrelation time of A',
                                               'formula': r'$T_{decorr} = \left\lceil\frac{1+r}{1-r}\right\rceil$'
                                              }
                                       ).sel(tau=0,percent=5), # T_decorr depends only on T
    'a':                   xr.DataArray(np.array(df['a']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': 'threshold on A for defining a heatwave'}
                                       ).sel(tau=0), # threshold does not depend on tau
    'total_norm':    xr.DataArray(np.array(df['total_norm']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': '||C||'}
                                       ),
    'fieldwise_norm':xr.DataArray(np.stack(df['fieldwise_norm']).reshape(nT,ntau,npercent,nfields),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind),
                                                'field': fields_ind
                                               },
                                        attrs={'description': '||C|| computed independently for each field'}
                                       ),
    'total_error_norm':    xr.DataArray(np.array(df['total_error_norm']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': '||C_ga - C||'}
                                       ),
    'fieldwise_error_norm':xr.DataArray(np.stack(df['fieldwise_error_norm']).reshape(nT,ntau,npercent,nfields),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind),
                                                'field': fields_ind
                                               },
                                        attrs={'description': '||C_ga - C|| computed independently for each field'}
                                       ),
    'total_norm_ratio':    xr.DataArray(np.array(df['total_norm_ratio']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': '||C_ga - C||/||C||'}
                                       ),
    'fieldwise_norm_ratio':xr.DataArray(np.stack(df['fieldwise_norm_ratio']).reshape(nT,ntau,npercent,nfields),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind),
                                                'field': fields_ind
                                               },
                                        attrs={'description': '||C_ga - C||/||C|| computed independently for each field'}
                                       ),
    'total_norm_ratio_gn': xr.DataArray(np.array(df['total_norm_ratio_gn']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': '||C_ga - C||/||C|| computed with global normalization instead of pixel-wise'}
                                       ),
    'fieldwise_norm_ratio_gn':xr.DataArray(np.stack(df['fieldwise_norm_ratio_gn']).reshape(nT,ntau,npercent,nfields),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind),
                                                'field': fields_ind
                                               },
                                        attrs={'description': '||C_ga - C||/||C|| computed independently for each field with global normalization instead of pixel-wise'}
                                       ),
    'total_aw_norm':    xr.DataArray(np.array(df['total_aw_norm']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': 'area weighted norm ||C||_w'}
                                       ),
    'fieldwise_aw_norm':xr.DataArray(np.stack(df['fieldwise_aw_norm']).reshape(nT,ntau,npercent,nfields),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind),
                                                'field': fields_ind
                                               },
                                        attrs={'description': 'area weighted norm ||C||_w computed independently for each field'}
                                       ),
    'total_aw_error_norm':    xr.DataArray(np.array(df['total_aw_error_norm']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': 'area weighted norm ||C_ga - C||_w'}
                                       ),
    'fieldwise_aw_error_norm':xr.DataArray(np.stack(df['fieldwise_aw_error_norm']).reshape(nT,ntau,npercent,nfields),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind),
                                                'field': fields_ind
                                               },
                                        attrs={'description': 'area weighted norm ||C_ga - C||_w computed independently for each field'}
                                       ),
    'total_aw_norm_ratio':    xr.DataArray(np.array(df['total_aw_norm_ratio']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': 'area weighted norm ratio ||C_ga - C||_w/||C||_w'}
                                       ),
    'fieldwise_aw_norm_ratio':xr.DataArray(np.stack(df['fieldwise_aw_norm_ratio']).reshape(nT,ntau,npercent,nfields),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind),
                                                'field': fields_ind
                                               },
                                        attrs={'description': 'area weighted norm ratio ||C_ga - C||_w/||C||_w computed independently for each field'}
                                       ),
    'total_aw_norm_ratio_gn': xr.DataArray(np.array(df['total_aw_norm_ratio_gn']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': 'area weighted norm ratio ||C_ga - C||_w/||C||_w computed with global normalization instead of pixel-wise'}
                                       ),
    'fieldwise_aw_norm_ratio_gn':xr.DataArray(np.stack(df['fieldwise_aw_norm_ratio_gn']).reshape(nT,ntau,npercent,nfields),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind),
                                                'field': fields_ind
                                               },
                                        attrs={'description': 'area weighted norm ratio ||C_ga - C||_w/||C||_w computed independently for each field with global normalization instead of pixel-wise'}
                                       ),
    'relative_error':      xr.DataArray(np.stack(df['sorted_rel_error']).reshape(nT,ntau,npercent,npixels),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind),
                                                'quantile': quantile
                                                },
                                        attrs={'description': 'sorted values of |(C_ga[i]/C[i] - 1)|'}
                                       ),
    'significance':        xr.DataArray(np.stack(df['sorted_significance']).reshape(nT,ntau,npercent,npixels),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind),
                                                'quantile': quantile
                                                },
                                        attrs={'description': 'sorted values of |(C_ga[i] - C[i])/C_std[i]|'}
                                       ),
    'raw_significance':     xr.DataArray(np.stack(df['significance']).reshape(nT,ntau,npercent,npixels),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind),
                                                'pixel': pixel
                                                },
                                        attrs={'description': 'values of (C_ga[i] - C[i])/C_std[i]'}
                                       ),
    'N_heatwaves':         xr.DataArray(np.array(df['N_heatwaves']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': 'number of heatwave days'}
                                       ).sel(tau=0), # number of heatwaves does not depend on tau,
    'N_heatwave_years':    xr.DataArray(np.array(df['N_heatwave_years']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': 'number of years with at least one heatwave day'}
                                       ).sel(tau=0), # number of heatwaves does not depend on tau,
    'N_ind_heatwaves':     xr.DataArray(np.array(df['N_ind_heatwaves']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': 'number of independent heatwave days, i.e. that are at least T days apart'}
                                       ).sel(tau=0), # number of heatwaves does not depend on tau,
    'N_decorr_heatwaves':  xr.DataArray(np.array(df['N_decorr_heatwaves']).reshape(nT,ntau,npercent),
                                        coords={'T': np.array(T_ind),
                                                'tau': np.array(tau_ind),
                                                'percent': np.array(percent_ind)
                                               },
                                        attrs={'description': 'number of decorrelated heatwave days, i.e. that are at least T_decorr days apart'}
                                       ).sel(tau=0), # number of heatwaves does not depend on tau,
    },
    attrs={
        'description': 'metrics for comparisons between composite maps computed on the data and with the gaussian approximation',
        # 'dataset': 'PLASIM - 80 years',
        'dataset': 'ERA5'
        'sector': f"{sector.strip('_')}"
    }
)

# set attributes for coordinates of the dataset
ds['T'].attrs = {'description': 'heatwave duration (A is computed as the forward T-day running mean)'}
ds['tau'].attrs = {'description': 'time delay from the first day of the heatwave'}
ds['percent'].attrs = {'description': 'the threshold `a` on A that defines heatwaves is such that `percent` of the data will be above `a`'}
ds['quantile'].attrs = {'description': 'np.arange(`npixels`)/`npixels`'}
ds['field'].attrs = {'description': 'index for the climate variables studied', 'field_names': ', '.join(field_names)}

ds

In [None]:
ds.to_netcdf(f'{root_folder}/metrics_.nc')
os.rename(f'{root_folder}/metrics_.nc', f'{root_folder}/{sector}metrics.nc')

## Analysis

In [None]:
ds = xr.open_dataset(f'{root_folder}/{sector}metrics.nc')
ds

### Table 5

In [None]:
sel = ds['total_aw_norm_ratio'].sel(percent=5, 
                                 # tau=[0,5,10,15,20,25,30],
                                 tau = np.arange(0,31,3),
                                        # field=1,
                                )

xlabel = r'$\tau$ [days]'
ylabel = r'$T$ [days]'

_ = tbl.table(sel.data, sel['tau'].data, sel['T'].data, color_range=(0.2,0.9), xlabel=xlabel, ylabel=ylabel, title='Norm ratio', num=5, figsize=(7,3))

In [None]:
_ = tbl.tex_table(sel.data, sel['tau'].data, sel['T'].data, color_range=(0.2,0.9), xlabel=xlabel, ylabel=ylabel, title='Norm ratio', close_left=False)
print(_)

### Equivalent of table 2 for ERA5 data

In [None]:
def quantile_below_threshold(data, quantiles, threshold):
    assert data.shape == quantiles.shape
    iot = data.data >= threshold
    i = np.argmax(iot)
    if i == 0:
        if np.sum(iot) == 0:
            return np.array([1])
        return np.array([0])
    return quantiles[i-1:i] # the first value of quantiles for which data overcomes threshold

def xr_qbt(da:xr.DataArray, threshold:float):
    return xr.apply_ufunc(quantile_below_threshold, da, da['quantile'], threshold,
                          input_core_dims=[['quantile'], ['quantile'], []],
                          exclude_dims=set(['quantile']),
                          vectorize=True
                         ).rename(f'Quantile of {da.name} below {threshold}')

def xr_qat(da:xr.DataArray, threshold:float):
    return (1 - xr_qbt(da, threshold)).rename(f'Quantile of {da.name} above {threshold}')

In [None]:
sign = 2
# sign = 1

sA = ((np.abs(ds['raw_significance'])*np.sqrt(ds['N_heatwave_years']) > sign)*ds['area_weights']).sum('pixel').rename('yr_area_significance')
# sA = ((np.abs(ds['raw_significance'])*np.sqrt(ds['N_decorr_heatwaves']) > sign)*ds['area_weights']).sum('pixel').rename('decorr_area_significance')
sA

In [None]:
sel = sA.sel(percent=5,
            tau=np.arange(0,31,3),
            )

xlabel = r'$\tau$ [days]'
ylabel = r'$T$ [days]'

_ = tbl.table(sel.data, sel['tau'].data, sel['T'].data, 
              xlabel=xlabel, ylabel=ylabel,
              title = fr'Fraction of area with error above ${sign}\sigma$',
              color_range = (0.2,0.9),
              text_digits=3,
             )

In [None]:
_ = tbl.tex_table(sel.data, sel['tau'].data, sel['T'].data, 
                  xlabel=xlabel, ylabel=ylabel,
                  title = fr'Fraction of area with error above ${sign}\sigma$',
                  color_range = (0.2,0.9),
                  close_left=False,
                  text_digits=3,
                 )
print(_)

### Figure 8

In [None]:
import general_purpose.cartopy_plots as cplt

lon = np.load(f'{root_folder}/lon.npy')
lon = cplt.monotonize_longitude(lon)
lat = np.load(f'{root_folder}/lat.npy')
LON, LAT = np.meshgrid(lon,lat)

def retrieve_maps(T, tau, percent):
    folder = f'{root_folder}/T{T}/tau{tau}/percent{percent}'
    comp = np.load(f'{folder}/X_comp.npy')
    comp_std = np.load(f'{folder}/X_comp_std.npy')
    comp_ga = np.load(f'{folder}/X_comp_GA.npy')
    
    return comp, comp_ga, comp_std

_,_,Xcstd = retrieve_maps(14,0,5)
reshaper = ut.Reshaper(Xcstd != 0)
print(reshaper.surviving_coords)

In [None]:
ds

In [None]:
T = 14
tau=0
percent=5
comp, comp_ga, comp_std = retrieve_maps(T,tau,percent)

fig = cplt.mfp(LON, LAT, comp, figsize=(6,5), fig_num=8,colorbar='shared', titles=[r'$C_\mathcal{D}$'], mx=1.05)[0].get_figure()

fig.savefig(f'{HOME}ERA_comp.pdf')

fig = cplt.mfp(LON, LAT, comp_ga, figsize=(6,5), fig_num=9,titles=[r'$C_\mathcal{G}$'], mx=1.05)[0].get_figure()

fig.savefig(f'{HOME}ERA_comp_ga.pdf')

fig = cplt.mfp(LON, LAT, comp - comp_ga, figsize=(6,5), fig_num=10,titles=[r'$C_\mathcal{D} - C_\mathcal{G}$'],mx=0.25)[0].get_figure()

fig.savefig(f'{HOME}ERA_comp_error.pdf')

In [None]:
import general_purpose.cartopy_plots as cplt

lon = np.load(f'{root_folder}/lon.npy')
lon = cplt.monotonize_longitude(lon)
lat = np.load(f'{root_folder}/lat.npy')
LON, LAT = np.meshgrid(lon,lat)

def retrieve_maps(T, tau, percent):
    folder = f'{root_folder}/T{T}/tau{tau}/percent{percent}'
    comp = np.load(f'{folder}/X_comp.npy')
    comp_std = np.load(f'{folder}/X_comp_std.npy')
    comp_ga = np.load(f'{folder}/X_comp_GA.npy')
    
    return comp, comp_ga, comp_std

_,_,Xcstd = retrieve_maps(14,0,5)
reshaper = ut.Reshaper(Xcstd != 0)
print(reshaper.surviving_coords)

In [None]:
T = 1
taus = [0,2,4,6]
percent = 5

In [None]:
norms = np.round(ds.total_aw_norm.sel(T=T, percent = percent, tau = taus).data,2)

In [None]:
norms

In [None]:
for i,tau in enumerate(taus):
    comp, comp_ga, comp_std = retrieve_maps(T, tau, percent)
    axs = cplt.mfp(LON, LAT, comp_ga/norms[i], one_fig_layout=110, figsize=(5,5), fig_num=8+i, colorbar='individual', titles=f'|C|={norms[i]}',mx=4,put_colorbar=False,)
    fig = axs[0].get_figure()
    fig.savefig(f'{HOME}ERA5_C_T{T}_tau{tau}.pdf')