In [11]:
import xarray as xr
import netCDF4
import numpy as np
import dask as da
from pathlib import Path
from datetime import datetime

In [14]:
CLIMATE_VARIABLES = { 
    '3': 'tasmin',
    '4': 'tas',
    '5': 'tasmax',
    '6': 'pr',
    '8': 'rsds',
    '9': 'sfcWind',
    '12': 'hurs'
    }

In [12]:

def load_weatherdata():
    ls = os.listdir('monica/climate_netcdf/')
    for l in ls:
        if l.split('.')[-1] != 'nc':
            ls.remove(l)

    path_list = ['monica/climate_netcdf/' + l for l in ls]
    weather_data = xr.open_mfdataset(path_list, combine='by_coords', chunks={'lat': 100, 'lon': 100})
    return weather_data



def load_netcdf_to_memory():
    """
    Loads all netcdf files of at least the past two years into memory. 
    This is done to avoid opening and closing the files for each request.
    """
    climate_data_path = './climate_netcdf'

    this_year = datetime.now().year
    start_year = this_year - 3

    path_list = []
    for _, value in CLIMATE_VARIABLES.items():

        for year in range(start_year, this_year + 1):
            file_path = f"{climate_data_path}/zalf_{value.lower()}_amber_{year}_v1-0.nc"
            path_list.append(file_path)


    climate = xr.open_mfdataset(path_list, combine='by_coords')
    climate_first_date = climate['time'][0]
    climate_last_date = climate['time'][-1]
    return climate, climate_first_date, climate_last_date

# CLIMATE_DATES = load_netcdf_to_memory()
# CLIMATE = CLIMATE_DATES[0]
# CLIMATE_FIRST_DATE = CLIMATE_DATES[1]
# CLIMATE_LAST_DATE = CLIMATE_DATES[2]

# def get_climate_data_as_json_new(start_date, end_date, lat_idx, lon_idx):

#     climate_json = {}
    
#     climate_slice = CLIMATE.sel(time=slice(start_date, end_date)).isel(lat=lat_idx, lon=lon_idx)

#     for key, value in CLIMATE_VARIABLES.items():
#         climate_json[key] = climate_slice[value].values.tolist()

#     return climate_json


# used
def get_climate_data_as_json(start_date, end_date, lat_idx, lon_idx):
    """Returns the climate data as json using monica's keys for the given start and end date and the given lat and lon index"""
    print("get_climate_data_as_json", start_date, end_date, lat_idx, lon_idx)

    # opening with MFDataset does not work, because time is not an unlimited dimension in the NetCDF files
    start = datetime.now()
    climate_json = { 
        '3': [],
        '4': [],
        '5': [],
        '6': [],
        '8': [],
        '9': [],
        '12': [],
        }
    climate_data_path = Path(__file__).resolve().parent.joinpath('climate_netcdf')

    for year in range(start_date.year, end_date.year + 1):
        print('climate data for loop', year)
        for key, value in CLIMATE_VARIABLES.items():
            
            file_path = f"{climate_data_path}/zalf_{value.lower()}_amber_{year}_v1-0.nc"
            print("filepath: ", file_path,  "getting key:", key)
            nc = Dataset(file_path, 'r')
            print('climate data check 1')
            start_idx = 0
            end_idx = len(nc['time']) + 1
            if year == start_date.year:
                start_idx = date2index(start_date, nc['time'])
            if year == end_date.year:
                end_idx = date2index(end_date, nc['time']) +1

            values = nc.variables[value][start_idx:end_idx, lat_idx, lon_idx]
            values = values.tolist()

            climate_json[key].extend(values)

            nc.close()

            print(year, value, key)
    print('Time elapsed in get_climate_data_as_json: ', datetime.now() - start)
    return climate_json

In [None]:
CLIMATE_DATES = load_netcdf_to_memory()


In [16]:
CLIMATE = CLIMATE_DATES[0]
CLIMATE_FIRST_DATE = CLIMATE_DATES[1]
CLIMATE_LAST_DATE = CLIMATE_DATES[2]

In [17]:
start_date = ('2022-01-11')
end_date = ('2023-01-11')
lat_idx = 200
lon_idx = 200

In [18]:
climate_slice = CLIMATE.sel(time=slice(start_date, end_date)).isel(lat=lat_idx, lon=lon_idx)