# Correlation statistics for global variables

This notebook calculates slope, p value, standard deviation of the slope, and correlation coefficients for the linear and exponential fitLoading libraires:

Loading libraires:

In [1]:
import netCDF4 as nc
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt
from matplotlib import cm
from scipy.optimize import curve_fit
from scipy import stats
import os

Define functions:

In [2]:
def read_txt(fname, ncolumns, dtype):
    fdata   = open(fname)
    flines  = np.array(fdata.readlines())
    data_in = np.empty([len(flines)])
    if dtype == 'string':
        data_in = np.empty([len(flines)], dtype=object)
    for i in np.arange(len(flines)):
        data_in[i]=flines[i].split()[ncolumns-1]      
    return(data_in)
    fdata.close()
    
def read_data(fname, ncolumns):
    if ncolumns > 1:
        fdata   = open(fname)
        flines  = np.array(fdata.readlines())
        data_in = np.empty([len(flines), ncolumns])
        for i in np.arange(len(flines)):
            data_in[i,:]=flines[i].split()
    else:
        with open(fname) as flist:
            data_in = np.array(flist.readlines())        
    return(data_in)
    fdata.close()

def read_nc(fname, varname, ifmerged):
    if ifmerged:
        f  = nc.MFDataset(fname)
    else:
        f  = nc.Dataset(fname)
    var    = f.variables[varname]
    lon    = f.variables['lon']
    lat    = f.variables['lat']
    time   = f.variables['time']
    dates  = nc.num2date(time[:], time.units, time.calendar)
    return var, lon, lat, time, dates

def read_mask(fname, varname):
    f = nc.Dataset(fname)
    var  = f.variables[varname][:]
    return var

Define paths and enviromental variables:

In [9]:
resolution   = 'low_res'
seasons      = ['ANNUAL', 'DJF', 'MAM', 'JJA', 'SON'] #'SON' #'SON' #None #
member_names = f'models_{resolution}.txt'
varname      = 'stas'
ssps         = ['ssp126', 'ssp245', 'ssp370', 'ssp585']
avg_period   = 10 #, 20, 30]
ref_ntimes   = 51
nssps = len(ssps)
ystart_to_plot  = 2010
ystart = int(ystart_to_plot - 1950)

root = '..'
path2data = f'{root}/data/data_txt/'  
path2info  = f'{root}/data/data_info/' 

Read list of the models to be processed:

In [10]:
member_list = f'{path2info}/{member_names}'
members  = read_txt(member_list, 1, 'string')
nmembers = len(members)

Main code:

In [None]:
# Loop over seasons
for season in seasons:       
    print(f'Working on the season {season}')
    
    Beta   = np.empty((nssps),dtype=float)
    R2_lin = np.empty((nssps),dtype=float)
    R2_exp = np.empty((nssps),dtype=float)
    p_value = np.empty((nssps),dtype=float)
    std_value = np.empty((nssps),dtype=float)       
    
        
    for s in np.arange(nssps):
        ssp = ssps[s]
        print("        Working on the ssp: " + ssp)      

        # Read reference data = global mean temperature
        ref_fname = f'{path2data}/{resolution}/global/tas/tas_{ssp}_ANNUAL.csv'
        ref_data = read_data(ref_fname, nmembers)

        # Read model data
        dirpath  = f'{resolution}/global/{varname}/' 
        filename = f'{varname}_{ssp}_{season}'
        infile   = f'{path2data}/{dirpath}/{filename}.csv'
        data     = read_data(infile, nmembers)         

        # Linear fit
        x = ref_data[ystart::,:].reshape(-1, 1)
        y = data[ystart::,:].reshape(-1, 1)
        Beta[s], intercept, rvalue, p_value[s], std_value[s] = stats.linregress(x[:,0], y[:,0])
        R2_lin[s] = rvalue*rvalue  

        # Exponential fit
        def func(x, a, b): return a * np.exp(b * x)
        popt, pcov = curve_fit(func, x[:,0], y[:,0])
        x_fitted = np.linspace(np.min(x), np.max(x), 150)
        y_fitted = func(x_fitted, *popt)         
        residuals = y[:,0] - func(x[:,0], *popt)
        res = np.sum(residuals**2)
        tot = np.sum((y[:,0]-np.mean(y[:,0]))**2)
        R2_exp[s] = 1 - (res / tot)

    # Save statistical info in a txt file
    print("    Saving the data in the output file.")    
    path2out  = f'{path2data}/{resolution}/global/{varname}'
    if not os.path.exists(path2out):
        os.makedirs(path2out)

    np.savetxt(f'{path2out}/Beta_{varname}_{season}.csv',      Beta,      delimiter =" ", fmt='%6.2f')
    np.savetxt(f'{path2out}/R2lin_{varname}_{season}.csv',     R2_lin,    delimiter =" ", fmt='%6.2f')
    np.savetxt(f'{path2out}/R2exp_{varname}_{season}.csv',     R2_exp,    delimiter =" ", fmt='%6.2f')
    np.savetxt(f'{path2out}/p_value_{varname}_{season}.csv',   p_value,   delimiter =" ", fmt='%6.5f')
    np.savetxt(f'{path2out}/std_value_{varname}_{season}.csv', std_value, delimiter =" ", fmt='%6.5f')        