# Spatial correlation statistics (per grid) for low and high resolution models

This notebook calculates slope, p value, standard deviation of the slope, and correlation coefficients for the linear and exponential fit per each grid cell for the spatial analysis.

Loading libraires:

In [7]:
import netCDF4 as nc
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from scipy import stats
import warnings
import os
import sys

Ignore warnings:

In [8]:
warnings.filterwarnings("ignore", category=DeprecationWarning) 
warnings.simplefilter("ignore", category=RuntimeWarning)

Define functions:

In [9]:
def read_txt(fname, ncolumns, dtype):
    fdata   = open(fname)
    flines  = np.array(fdata.readlines())
    data_in = np.empty([len(flines)])
    if dtype == 'string':
        data_in = np.empty([len(flines)], dtype=object)
    for i in np.arange(len(flines)):
        data_in[i]=flines[i].split()[ncolumns-1]      
    return(data_in)
    fdata.close()
    
# Define functions
def read_data(fname, ncolumns):
    if ncolumns > 1:
        fdata   = open(fname)
        flines  = np.array(fdata.readlines())
        data_in = np.empty([len(flines), ncolumns])
        for i in np.arange(len(flines)):
            data_in[i,:]=flines[i].split()
    else:
        with open(fname) as flist:
            data_in = np.array(flist.readlines())        
    return(data_in)
    fdata.close()

def read_nc(fname, varname, ifmerged):
    if ifmerged:
        f  = nc.MFDataset(fname)
    else:
        f  = nc.Dataset(fname)
    var    = f.variables[varname]
    lon    = f.variables['lon']
    lat    = f.variables['lat']
    time   = f.variables['time']
    dates  = nc.num2date(time[:], time.units, time.calendar)
    return var, lon, lat, time, dates

def read_mask(fname, varname):
    f = nc.Dataset(fname)
    var  = f.variables[varname][:]
    return var

Define paths and enviromental variables:

In [10]:
resolution = 'high_res'
seasons = ['ANNUAL', 'DJF', 'MAM', 'JJA', 'SON'] #'SON' #'SON' #None #
varname = 'tos'
avg_period = 10 # years
ssps = ['ssp585']
nssps = len(ssps)
gwls = [1.5, 2, 3, 4]
ref_ntimes = 51
ystart_to_plot  = 2010
ystart = int(ystart_to_plot - 1950)

if resolution=='high_res':
    gcm='CNRM-CM6-1'   # or'CNRM-CM6-1'
    column = 9         # 9 (CNRM-CM6-1), 10 (CNRM-CM6-1-HR)

root = '..'
path2info  = f'{root}/data/data_info/'
member_names = f'models_low_res.txt'

Read list of the model to be processed

In [5]:
member_list = f'{path2info}/{member_names}'
members  = read_txt(member_list, 1, 'string')
nmembers = len(members)

In [None]:
# Loop over seasons
for season in seasons:
    print(f'Working on the season: {season}') 
    
    #if resolution=='high_res':
    #    path2data = f'{root}/data/data_raw/{resolution}/{gcm}/{varname}/'
    #elif resolution=='low_res':
    #    path2data = f'{root}/data/data_raw/{resolution}/{varname}/{season}/'
        
    path2data = f'{root}/data/data_raw/{resolution}/{varname}/'
    path2out  = f'{root}/data/data_txt/{resolution}/spatial/{varname}/{season}'
    
    if not os.path.exists(path2out):
        os.makedirs(path2out)        

    for ssp in ssps:
        print("    Working on the ssp: " + ssp)

        # Load reference data
        gwl_fname = f'{root}/data/data_txt/low_res/global/tas/tas_{ssp}_ANNUAL.csv'
        gwl_data  = read_data(gwl_fname, nmembers)

        # Load model reference data
        if resolution=='high_res':
            fname = [f'{path2data}/{varname}_{gcm}_historical_{season}.nc',
                     f'{path2data}/{varname}_{gcm}_{ssp}_{season}.nc']
        elif resolution=='low_res':
            fname = [f'{path2data}/CMIP6_historical_{varname}_{season}.nc',
                     f'{path2data}/CMIP6_{ssp}_{varname}_{season}.nc']

        var, lon, lat, time, dates = read_nc(fname, varname, True)           

        # Define dimensions
        nlats  = len(lat)
        nlons  = len(lon)
        ntimes = len(dates)

        # Read years
        years =np.empty(ntimes)
        for i in range(len(dates)): years[i]=dates[i].year 

        # Allocate arrays
        run_ntimes = int(ntimes - avg_period - ref_ntimes + 1)

        if resolution=='low_res':
            delta_mean = np.empty([run_ntimes, nmembers, nlats, nlons])
            mask_4D    = np.empty(delta_mean.shape, dtype=bool)

            # Calculate mean for the reference period
            ref_data = var[0:ref_ntimes,:,:,:]
            ref_mean = np.nanmean(ref_data,axis=(0))
            mask_ref = np.empty(ref_data.shape, dtype=bool)

            # Calculate mean anomaly with respect to the reference period
            for t in np.arange(0, run_ntimes):
                it = t + ref_ntimes
                delta_mean[t,:,:,:] = np.nanmean(var[it:(it + avg_period-1), :, :, :], axis=(0))-ref_mean 

        elif resolution=='high_res':                   
            delta_mean = np.empty([run_ntimes, nlats, nlons])
            mask_4D    = np.empty(delta_mean.shape, dtype=bool)

            # Calculate mean for the reference period
            ref_data = var[0:ref_ntimes,:,:]
            ref_mean = np.nanmean(ref_data,axis=(0))
            mask_ref = np.empty(ref_data.shape, dtype=bool)

            # Calculate mean anomaly with respect to the reference period
            for t in np.arange(0, run_ntimes):
                it = t + ref_ntimes
                delta_mean[t,:,:] = np.nanmean(var[it:(it + avg_period-1), :, :], axis=(0))-ref_mean 

        # Allocate statistical variables
        Beta      = np.empty((len(lat),len(lon)),dtype=float)
        R2_lin    = np.empty((len(lat),len(lon)),dtype=float)
        p_value   = np.empty((len(lat),len(lon)),dtype=float)
        std_value = np.empty((len(lat),len(lon)),dtype=float)

        for i in np.arange(0,len(lat)):
            for j in np.arange(0,len(lon)):

                # Calculate linear and exponential fit                                     
                if resolution=='high_res' and (np.isnan(delta_mean[(ystart+49)::,i,j]).any()) \
                    or resolution=='low_res'and (np.isnan(delta_mean[ystart::,:,i,j]).any()):
                        Beta[i,j]   = np.nan
                        R2_lin[i,j] = np.nan
                        p_value[i,j] = np.nan
                        std_value[i,j] = np.nan
                else:
                    if resolution=='high_res':                            
                        x = gwl_data[ystart::,column].reshape(-1, 1)
                        y = delta_mean[(ystart+49)::,i,j].reshape(-1, 1)
                    elif resolution=='low_res':
                        x = gwl_data[ystart::,:].reshape(-1, 1)
                        y = delta_mean[ystart::,:,i,j].reshape(-1, 1)    
                    else:
                        print('      Check the data resolution, exiting ...')
                        sys.exit()  

                    Beta[i,j], R2_lin[i,j], rvalue, p_value[i,j], std_value[i,j] = stats.linregress(x[:,0], y[:,0])
                    R2_lin[i,j] = rvalue*rvalue

        np.savetxt(f'{path2out}/Beta_{varname}_{season}.csv',      Beta,      delimiter =" ",fmt='%6.2f')
        np.savetxt(f'{path2out}/r2lin_{varname}_{season}.csv',     R2_lin,    delimiter =" ",fmt='%6.2f')
        np.savetxt(f'{path2out}/p_value_{varname}_{season}.csv',   p_value,   delimiter =" ",fmt='%6.5f')
        np.savetxt(f'{path2out}/std_value_{varname}_{season}.csv', std_value, delimiter =" ",fmt='%6.5f')  