In [None]:
import xarray as xr
import pandas as pd
import glob
import numpy as np
from datetime import datetime, timedelta

## Extraction of meteorological data on the day of sampling

In [None]:
path_to_db_folder = '' #string path to folder where database is
ds = pd.read_excel('{0}\\Globe-LFMC-2.0.xlsx'.format(path_to_db_folder), sheet_name='LFMC data')
globelfmc = ds.copy()
globelfmc

In [None]:
start_char_date = -22
end_char_date = -14

main_path = '' #string path to folder with AgERA5 variables folders

folders = ['Precipitation-Flux','Relative-Humidity-2m-06h','Relative-Humidity-2m-09h','Relative-Humidity-2m-12h','Relative-Humidity-2m-15h','Temperature-Air-2m-Max-24h','Temperature-Air-2m-Mean-24h','Vapour-Pressure-Mean','Wind-Speed-10m-Mean','Dew-Point-Temperature-2m-Mean'] 

In [None]:
for folder_name in folders:
    path_to_files_with_star = '{0}\\{1}\\*'.format(main_path, folder_name) 

    var_name = folder_name.replace('-','_')

    globelfmc[var_name] = np.nan

    for f in glob.glob(path_to_files_with_star):

        date_str = f[start_char_date:end_char_date]
        print(var_name, date_str)

        date = datetime.strptime(date_str, '%Y%m%d')

        if date in list(globelfmc['Sampling date (YYYYMMDD)']):

            nc_var = xr.open_dataset(f)

            sub_sites = sorted(set(globelfmc.loc[globelfmc['Sampling date (YYYYMMDD)']==date,'Site name']))

            for site in sub_sites:

                lat = globelfmc.loc[globelfmc['Site name']==site,'Latitude (WGS84, EPSG:4326)'].values[0]
                lon = globelfmc.loc[globelfmc['Site name']==site,'Longitude (WGS84, EPSG:4326)'].values[0]

                value = nc_var.sel(lat=lat, lon=lon, method='nearest')[var_name].data[0]

                globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']==date) & (globelfmc['Site name']==site), var_name] = value
                
        
    globelfmc.to_csv('{0}\\Globe-LFMC-2.0_AgERA5_up to {1}.csv'.format(path_to_db_folder, var_name), index=False)


globelfmc.to_csv('{0}\\Globe-LFMC-2.0_AgERA5.csv'.format(path_to_db_folder), index=False)

## Extraction of cumulative precipitation data

In [None]:
checkpoint_folder = '' # path to folder where to save checkpoint files
checkpoint_path = glob.glob('{0}\\CHECKPOINT cumulative rain_*.xlsx'.format(checkpoint_folder))

if len(checkpoint_path) > 0:
    latest = sorted(checkpoint_path)[-1]
    print(latest)

    ds = pd.read_excel(latest)
    globelfmc = ds.copy()
    del ds

    year_to_start = int(latest[-9:-5]) + 1
    print(year_to_start)

else:
    ds = pd.read_excel('{0}\\Globe-LFMC-2.0.xlsx'.format(path_to_db_folder), sheet_name='LFMC data')  
    globelfmc = ds.copy()
    del ds

    year_to_start = 1979 #first available year of AgERA5

    globelfmc['Precipitation_3days'] = 0
    globelfmc['Precipitation_1week'] = 0
    globelfmc['Precipitation_4weeks'] = 0
    globelfmc['Precipitation_12weeks'] = 0

In [None]:
start_char_date = -22
end_char_date = -14

main_path = '' #string path to folder with AgERA5 variables folders

path_to_rain_files = main_path+'\\Precipitation-Flux\\Precipitation-Flux_C3S-glob-agric_AgERA5_{}_final-v1.0.nc'
path_to_rain_folder = main_path+'\\Precipitation-Flux\\*.nc'

In [None]:
start_char_year = -22
end_char_year = -18

threshold_3day = (datetime(1979,1,1)+timedelta(days=3))
threshold_1week = (datetime(1979,1,1)+timedelta(days=7))
threshold_4weeks = (datetime(1979,1,1)+timedelta(days=28))
threshold_12weeks = (datetime(1979,1,1)+timedelta(days=84))

for year in range(year_to_start, 2023+1):
    list_rain_files = [f for f in glob.glob(path_to_rain_folder) if int(f[start_char_year:end_char_year])==year]

    for rain_file in list_rain_files:
        rain_date_str = rain_file[start_char_date:end_char_date]
        rain_date = datetime.strptime(rain_date_str, '%Y%m%d')
        print(rain_date_str)
        
        nc_rain = xr.open_dataset(rain_file)
        
        for n in range(1,84+1):
            d_future = rain_date+timedelta(days=n)
            sub_sites = sorted(set(globelfmc.loc[globelfmc['Sampling date (YYYYMMDD)']==d_future,'Site name']))

            if len(sub_sites) > 0:
                for site in sub_sites:

                    lat = globelfmc.loc[globelfmc['Site name']==site,'Latitude (WGS84, EPSG:4326)'].values[0]
                    lon = globelfmc.loc[globelfmc['Site name']==site,'Longitude (WGS84, EPSG:4326)'].values[0]
                    rain_value = nc_rain.sel(lat=lat, lon=lon, method='nearest')['Precipitation_Flux'].data[0]

                    if n in range(1,3+1): # 3 days
                        globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']==d_future) & (globelfmc['Site name']==site),'Precipitation_3days'] += rain_value
                        globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']==d_future) & (globelfmc['Site name']==site),'Precipitation_1week'] += rain_value
                        globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']==d_future) & (globelfmc['Site name']==site),'Precipitation_4weeks'] += rain_value
                        globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']==d_future) & (globelfmc['Site name']==site),'Precipitation_12weeks'] += rain_value
                    
                    elif n in range(4,7+1): # 1 week
                        globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']==d_future) & (globelfmc['Site name']==site),'Precipitation_1week'] += rain_value
                        globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']==d_future) & (globelfmc['Site name']==site),'Precipitation_4weeks'] += rain_value
                        globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']==d_future) & (globelfmc['Site name']==site),'Precipitation_12weeks'] += rain_value

                    elif n in range(8,28+1): # 4 weeks
                        globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']==d_future) & (globelfmc['Site name']==site),'Precipitation_4weeks'] += rain_value
                        globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']==d_future) & (globelfmc['Site name']==site),'Precipitation_12weeks'] += rain_value

                    elif n in range(29,84+1): # 12 weeks
                        globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']==d_future) & (globelfmc['Site name']==site),'Precipitation_12weeks'] += rain_value


    globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']<threshold_3day),'Precipitation_3days'] = 'NA'
    globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']<threshold_1week),'Precipitation_1week'] = 'NA'
    globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']<threshold_4weeks),'Precipitation_4weeks'] = 'NA'
    globelfmc.loc[(globelfmc['Sampling date (YYYYMMDD)']<threshold_12weeks),'Precipitation_12weeks'] = 'NA'

    globelfmc.to_excel('{0}\\CHECKPOINT cumulative rain_{1}.xlsx'.format(checkpoint_folder,year), index=False)



globelfmc.to_csv('{0}\\Globe-LFMC-2.0_AgERA5 cumulative rain_final.csv'.format(path_to_db_folder), index=False)            

