# setup


In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
import geopandas as gpd
import xarray as xr
import rioxarray
import matplotlib.gridspec as gridspec
import cartopy.crs as ccrs
import matplotlib as mpl
from cmcrameri import cm
from pymannkendall import original_test
from scipy.stats import entropy

folder_ts = r'Data\Datasets\amz\ts'
folder_metric = r'Data\Datasets\amz\ent'
datasets = ['cru', 'gpcc', 'chirps','imerg', 'terra', 'era_land', 'jra55','merra2']
datasets_names = ['CRU', 'GPCC', 'CHIRPS','IMERG-V6', 'TerraClimate', 'ERA5-Land', 'JRA55','MERRA2']



In [2]:
# functions
def mannkendall_trend(arr):
    if not np.isnan(arr).any():
        result = original_test(arr)
        return result.p, result.slope, result.intercept
    else:
        return np.nan, np.nan, np.nan

def ds_kendall(data, dim, var='pr'):
    results =  xr.apply_ufunc(mannkendall_trend, data,
                             input_core_dims=[[dim]],
                             output_core_dims=[[], [],[]],
                             vectorize=True,
                             dask='parallelized')
    
    
    # Extract the p-values and Sen's slopes from the results
    p_values = results[0]
    slopes = results[1]
    intercepts = results[2]
    
    # Create a new xarray dataset to store the results
    results_dataset = xr.Dataset({'p_values': p_values[var], 'slopes': slopes[var], 'intercepts': intercepts[var]})
    return results_dataset
def yearly_ratio(x):
    return x / (x.mean(dim='time')*12)

def entropy_models(ds):
    
    ds['ratio'] = ds.groupby('time.year').apply(yearly_ratio).pr
    uniform = np.zeros((12, ds.pr.shape[1], ds.pr.shape[2]))
    uniform[:] = 1/12
    ds['ent'] = ds.ratio.groupby('time.year').reduce(entropy, qk = uniform, base=2, dim='time')
    return ds[['ent']]


# ts clim trend

In [3]:
for dataset in datasets:
    #read file of dataset in folder_clean
    file_path = glob(os.path.join(folder_ts, dataset + '.nc'))
    ds = xr.open_dataset(file_path[0])
    ds_ts = entropy_models(ds)
    #pr times 12
    ds_clim = ds_ts.mean('year')
    ds_trend =  ds_kendall(ds_ts, 'year', var='ent')
    #save
    ds_ts.to_netcdf(os.path.join(folder_metric, dataset + '.nc'))
    ds_clim.to_netcdf(os.path.join(folder_metric,'clim', dataset + '.nc'))
    ds_trend.to_netcdf(os.path.join(folder_metric,'trend', dataset + '.nc'))

# station


In [3]:
stations = gpd.read_file(r'Data\Evaluation\stations_amz_ANA.geojson')
df_stat= pd.read_pickle(r'Data\Evaluation\amz_01_20_20bet.pkl')


In [4]:
#calculate Relative Entropy
df_stat = df_stat.sort_values(by=['Code', 'Date']).reset_index(drop=True)
df_stat['ratio'] = df_stat['Total'] / df_stat.groupby(['Code', df_stat.Date.dt.year]).transform(np.sum)['Total']
uniform = np.zeros(12)
uniform[:] = 1/12
# aplly the entropy function to the ratio column for each code
df_ts = df_stat.groupby(['Code', df_stat.Date.dt.year])['ratio'].apply(lambda x: entropy(x, qk=uniform,base=2)).reset_index().rename({'ratio': 'ent'},  axis='columns')
df_clim = df_ts.groupby('Code')['ent'].mean().reset_index()
stations = stations.merge(df_clim, on='Code')
stations['ent'] = stations['ent'].fillna(0)
for code in df_ts.Code.unique():
    test = original_test(df_ts[df_ts.Code == code]['ent'])
    stations.loc[stations.Code == code, 'p'+ '_' + 'ent'] = test.p
    stations.loc[stations.Code == code, 'slope'+ '_' + 'ent'] = test.slope
    stations.loc[stations.Code == code, 'intercept'+ '_' + 'ent'] = test.intercept

#save stations as stations map
stations.to_file(os.path.join(folder_metric, 'stations.geojson'), driver='GeoJSON')
df_ts.to_csv(os.path.join(folder_metric, 'stations_ts.csv'))



  df_stat['ratio'] = df_stat['Total'] / df_stat.groupby(['Code', df_stat.Date.dt.year]).transform(np.sum)['Total']
