# setup

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
import geopandas as gpd
import xarray as xr
import rioxarray
import matplotlib.gridspec as gridspec
import cartopy.crs as ccrs
import matplotlib as mpl
from cmcrameri import cm
from pymannkendall import original_test

folder_ts = r'Data\Datasets\amz\ts'
folder_metric = r'Data\Datasets\amz\mcwd'
datasets = ['cru', 'gpcc', 'chirps','imerg', 'terra', 'era_land', 'jra55','merra2']
datasets_names = ['CRU', 'GPCC', 'CHIRPS','IMERG-V6', 'TerraClimate', 'ERA5-Land', 'JRA55','MERRA2']
evapotranspiration = 100

In [4]:
#setup
def mcwd_f(x):
    result = np.asarray(x, dtype=float).copy()
    for i in range(len(result)):
        wdn = result[i]
        wdn1 = result[i-1] if i != 0 else 0
        
        if i == 0:
            if wdn > 0:
                result[i] = 0
            else:
                result[i] = wdn
        
        if i != 0:
            cwd = wdn1 + wdn
            if cwd < 0:
                result[i] = cwd
            else:
                result[i] = 0
    
    return result.min()


# functions
def mannkendall_trend(arr):
    if not np.isnan(arr).any():
        result = original_test(arr)
        return result.p, result.slope, result.intercept
    else:
        return np.nan, np.nan, np.nan

def ds_kendall(data, dim, var='pr'):
    results =  xr.apply_ufunc(mannkendall_trend, data,
                             input_core_dims=[[dim]],
                             output_core_dims=[[], [],[]],
                             vectorize=True,
                             dask='parallelized')
    
    
    # Extract the p-values and Sen's slopes from the results
    p_values = results[0]
    slopes = results[1]
    intercepts = results[2]
    
    # Create a new xarray dataset to store the results
    results_dataset = xr.Dataset({'p_values': p_values[var], 'slopes': slopes[var], 'intercepts': intercepts[var]})
    return results_dataset

# ts clim trend

In [None]:
for dataset in datasets:
    #read file of dataset in folder_clean
    file_path = glob(os.path.join(folder_ts, dataset + '.nc'))
    ds = xr.open_dataset(file_path[0])
    
    ds_minus_et = ds - evapotranspiration
    ds_ts = xr.apply_ufunc(mcwd_f, ds_minus_et['pr'].groupby('time.year'),
                                    input_core_dims=[['time']],
                                    output_core_dims=[[]],
                                    vectorize=True, dask='parallelized').rename('mcwd').to_dataset()
    #pr times 12
    ds_clim = ds_ts.mean('year')
    ds_trend =  ds_kendall(ds_ts, 'year', var='mcwd')
    #save
    ds_ts.to_netcdf(os.path.join(folder_metric, dataset + '.nc'))
    ds_clim.to_netcdf(os.path.join(folder_metric,'clim', dataset + '.nc'))
    ds_trend.to_netcdf(os.path.join(folder_metric,'trend', dataset + '.nc'))

# station

In [25]:
stations = gpd.read_file(r'Data\Evaluation\stations_amz_ANA.geojson')
df_stat= pd.read_pickle(r'Data\Evaluation\amz_01_20_20bet.pkl').sort_values(by=['Code', 'Date']).reset_index(drop=True)
df_stat['Total']= df_stat['Total'] - evapotranspiration
#apply mcwd
df_ts = df_stat.groupby(['Code', df_stat.Date.dt.year])['Total'].apply(mcwd_f).reset_index().rename({'Total': 'mcwd'},  axis='columns')
df_clim = df_ts.groupby('Code')['mcwd'].mean().reset_index()
stations = stations.merge(df_clim,  on='Code')
for code in df_ts.Code.unique():
    test = original_test(df_ts[df_ts.Code == code]['mcwd'])
    stations.loc[stations.Code == code, 'p'+ '_' + 'mcwd'] = test.p
    stations.loc[stations.Code == code, 'slope'+ '_' + 'mcwd'] = test.slope
    stations.loc[stations.Code == code, 'intercept'+ '_' + 'mcwd'] = test.intercept

#save stations as stations map
stations.to_file(os.path.join(folder_metric, 'stations.geojson'), driver='GeoJSON')
df_ts.to_csv(os.path.join(folder_metric, 'stations_ts.csv'))
