# setup

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
import geopandas as gpd
import xarray as xr
from pymannkendall import original_test
shp_pan = gpd.read_file(r'Data\shapefiles\panamz.geojson')


In [2]:
folder_ts = r'Data\Datasets\amz\ts'
folder_metric = r'Data\Datasets\amz\map'
datasets = ['cru', 'gpcc', 'chirps','imerg', 'terra', 'era_land', 'jra55','merra2']
datasets_names = ['CRU', 'GPCC', 'CHIRPS','IMERG-V6', 'TerraClimate', 'ERA5-Land', 'JRA55','MERRA2']


In [3]:
# functions
def mannkendall_trend(arr):
    if not np.isnan(arr).any():
        result = original_test(arr)
        return result.p, result.slope, result.intercept
    else:
        return np.nan, np.nan, np.nan

def ds_kendall(data, dim):
    results =  xr.apply_ufunc(mannkendall_trend, data,
                             input_core_dims=[[dim]],
                             output_core_dims=[[], [],[]],
                             vectorize=True,
                             dask='parallelized')
    
    
    # Extract the p-values and Sen's slopes from the results
    p_values = results[0]
    slopes = results[1]
    intercepts = results[2]
    
    # Create a new xarray dataset to store the results
    results_dataset = xr.Dataset({'p_values': p_values.pr, 'slopes': slopes.pr, 'intercepts': intercepts.pr})
    return results_dataset

# ts clim trend

In [41]:
for dataset in datasets:
    #read file of dataset in folder_clean
    file_path = glob(os.path.join(folder_ts, dataset + '.nc'))
    ds = xr.open_dataset(file_path[0])
    ds_ts = ds.groupby('time.year').mean('time')
    #pr times 12
    ds_ts['pr'] = ds_ts['pr'] * 12
    ds_clim = ds_ts.mean('year')
    ds_trend =  ds_kendall(ds_ts, 'year')
    #save
    ds_ts.to_netcdf(os.path.join(folder_metric, dataset + '.nc'))
    ds_clim.to_netcdf(os.path.join(folder_metric,'clim', dataset + '.nc'))
    ds_trend.to_netcdf(os.path.join(folder_metric,'trend', dataset + '.nc'))

# station

In [3]:
stations = gpd.read_file(r'Data\Evaluation\stations_amz_ANA.geojson')
df_stat= pd.read_pickle(r'Data\Evaluation\amz_01_20_20bet.pkl')
df_ts = df_stat.groupby(['Code',df_stat.Date.dt.year]).sum().reset_index()
df_clim = df_ts.groupby('Code').mean().reset_index()
stations = stations.merge(df_clim, on='Code')
for code in df_ts.Code.unique():
    test = original_test(df_ts[df_ts.Code == code]['Total'])
    stations.loc[stations.Code == code, 'p'+ '_' + 'anual'] = test.p
    stations.loc[stations.Code == code, 'slope'+ '_' + 'anual'] = test.slope
    stations.loc[stations.Code == code, 'intercept'+ '_' + 'anual'] = test.intercept

#save stations as stations map
stations.to_file(os.path.join(folder_metric, 'stations.geojson'), driver='GeoJSON')
df_ts.to_csv(os.path.join(folder_metric, 'stations_ts.csv'))

  df_ts = df_stat.groupby(['Code',df_stat.Date.dt.year]).sum().reset_index()


# interpolar

In [15]:
terra = xr.open_dataset(r'Data\Datasets\amz\map\clim\terra.nc')
folder_metric = r'Data\Datasets\amz\map\clim'
folder_metric_int = r'Data\Datasets\amz\map\clim\int'
datasets = ['cru', 'gpcc', 'chirps','imerg', 'terra', 'era_land', 'jra55','merra2']
datasets_names = ['CRU', 'GPCC', 'CHIRPS','IMERG-V6', 'TerraClimate', 'ERA5-Land', 'JRA55','MERRA2']

In [27]:
#interpolate all datasets to terra grid and unite in a single dataset
list_ds = []
for i,dataset in enumerate(datasets):
    ds = xr.open_dataset(os.path.join(folder_metric, dataset + '.nc'))
    ds_int = ds.interp_like(terra, method='nearest')
    ds_int = ds_int[['pr']]
    ds_int.to_netcdf(os.path.join(folder_metric_int, dataset + '.nc'))
    ds_int['dataset'] = datasets_names[i]
    list_ds.append(ds_int)
ds_all = xr.concat(list_ds, dim='dataset')
ds_all.to_netcdf(os.path.join(folder_metric_int, 'all.nc'))

# extras

In [15]:
datasets_extras = ['cpc', 'cmap','cmorph', 'gpcp', 'persiann', 'ccs', 'cdr', 'mswep',
                    'mswep_nogauge', 'gldas','worldclim', 'era', 'ncep1', 'ncep2']

datasets_names_extras = ['CPC', 'CMAP','CMORPH', 'GPCP v3.2', 'PERSIANN', 'PERSIANN-CCS', 'PERSIANN-CDR', 'MSWEP v2.8',
                    'MSWEP_nogauge v2.8', 'GLDAS v2.1','WorldClim', 'ERA5', 'NCEP R1', 'NCEP R2']

In [4]:
datasets_extras = ['ccs']

In [5]:
for dataset in datasets_extras:
    #read file of dataset in folder_clean
    file_path = glob(os.path.join(folder_ts, dataset + '.nc'))
    ds = xr.open_dataset(file_path[0])
    ds_ts = ds.groupby('time.year').mean('time')
    #pr times 12
    ds_ts['pr'] = ds_ts['pr'] * 12
    ds_clim = ds_ts.mean('year')
    ds_trend =  ds_kendall(ds_ts, 'year')
    #save
    ds_ts.to_netcdf(os.path.join(folder_metric, dataset + '.nc'))
    ds_clim.to_netcdf(os.path.join(folder_metric,'clim', dataset + '.nc'))
    ds_trend.to_netcdf(os.path.join(folder_metric,'trend', dataset + '.nc'))
    print(dataset)

ccs
