In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
import geopandas as gpd
import xarray as xr
import rioxarray
import matplotlib.gridspec as gridspec
import cartopy.crs as ccrs
import matplotlib as mpl
from cmcrameri import cm
from pymannkendall import original_test


folder_ts = r'Data\Datasets\amz\ts'
folder_metric = r'Data\Datasets\amz\amp'
datasets = ['cru', 'gpcc', 'chirps','imerg', 'terra', 'era_land', 'jra55','merra2']
datasets_names = ['CRU', 'GPCC', 'CHIRPS','IMERG-V6', 'TerraClimate', 'ERA5-Land', 'JRA55','MERRA2']



In [10]:
# functions
def mannkendall_trend(arr):
    if not np.isnan(arr).any():
        result = original_test(arr)
        return result.p, result.slope, result.intercept
    else:
        return np.nan, np.nan, np.nan

def ds_kendall(data, dim, var='pr'):
    results =  xr.apply_ufunc(mannkendall_trend, data,
                             input_core_dims=[[dim]],
                             output_core_dims=[[], [],[]],
                             vectorize=True,
                             dask='parallelized')
    
    
    # Extract the p-values and Sen's slopes from the results
    p_values = results[0]
    slopes = results[1]
    intercepts = results[2]
    
    # Create a new xarray dataset to store the results
    results_dataset = xr.Dataset({'p_values': p_values[var], 'slopes': slopes[var], 'intercepts': intercepts[var]})
    return results_dataset

# ts clim trend

In [None]:
for dataset in datasets:
    #read file of dataset in folder_clean
    file_path = glob(os.path.join(folder_ts, dataset + '.nc'))
    ds = xr.open_dataset(file_path[0])
    maxx = ds.groupby('time.year').max('time')
    minn = ds.groupby('time.year').min('time')
    ds_ts = ( maxx - minn).pr.rename('amp').to_dataset()
    ds_ts['max'] = maxx.pr
    ds_ts['min'] = minn.pr
    ds_clim = ds_ts.mean('year')
    ds_trend =  ds_kendall(ds_ts, 'year', var='amp')
    #save
    ds_ts.to_netcdf(os.path.join(folder_metric, dataset + '.nc'))
    ds_clim.to_netcdf(os.path.join(folder_metric,'clim', dataset + '.nc'))
    ds_trend.to_netcdf(os.path.join(folder_metric,'trend', dataset + '.nc'))

# station

In [29]:
stations = gpd.read_file(r'Data\Evaluation\stations_amz_ANA.geojson')
df_stat= pd.read_pickle(r'Data\Evaluation\amz_01_20_20bet.pkl')

In [30]:
df_ts = df_stat.groupby(['Code', df_stat.Date.dt.year])['Total'].max().reset_index()
df_ts.rename(columns={'Total':'max'}, inplace=True)
df_ts['min'] = df_stat.groupby(['Code', df_stat.Date.dt.year])['Total'].min().reset_index()['Total']
df_ts['amp'] = df_ts['max'] - df_ts['min']

In [31]:
df_clim = df_ts.groupby('Code')[['amp', 'min', 'max']].mean().reset_index()
stations = stations.merge(df_clim, on='Code')
for code in df_ts.Code.unique():
    test = original_test(df_ts[df_ts.Code == code]['amp'])
    stations.loc[stations.Code == code, 'p'+ '_' + 'amp'] = test.p
    stations.loc[stations.Code == code, 'slope'+ '_' + 'amp'] = test.slope
    stations.loc[stations.Code == code, 'intercept'+ '_' + 'amp'] = test.intercept

#save stations as stations map
stations.to_file(os.path.join(folder_metric, 'stations.geojson'), driver='GeoJSON')
df_ts.to_csv(os.path.join(folder_metric, 'stations_ts.csv'))


