In [1]:
from datetime import timedelta as td
from datetime import datetime as dt
from glob import glob
import pandas as pd
import xarray as xr
import numpy as np
import json
import os

In [2]:
PATH_RESOURCES = os.path.join('..', '..', 'resources')
PATH_LOCATIONS = os.path.join(PATH_RESOURCES, 'utils', 'locations-webcam.json')
PATH_CSV       = os.path.join(PATH_RESOURCES, 'utils', 'images_ibericam_config.csv')
PATH_RESULT    = os.path.join(PATH_RESOURCES, 'utils', 'images_weather_data.csv')
PATH_VARIABLES = os.path.join(PATH_RESOURCES, 'reanalysis-era5-single-levels')
LOCATIONS      = json.load(open(PATH_LOCATIONS))

In [3]:
def closestValuesToLocation(lat, lon, date, ds):
    date     = dt.strptime(date, "%d-%m-%Y_%H:%M")

    idx_close_dt = np.argmin(np.abs(ds.time.values - np.datetime64(date)))
    close_dt     = ds.time.values[idx_close_dt]

    close_lat    = ds.latitude[np.argmin(np.abs(lat - ds.latitude.values))].values
    close_lon    = ds.longitude[np.argmin(np.abs(lon - ds.longitude.values))].values

    ds = ds.sel(time=close_dt, latitude=close_lat, longitude=close_lon)
    
    dic_result = {
            'lat'  : lat,
            'lon'  : lon,
            'date' : date
        }
    
    for key in list(ds.data_vars.keys()):
        dic_result[key] = ds[key].values
    
    return dic_result

In [4]:
def generateDataFrameImages() :
    df_csv        = pd.read_csv(PATH_CSV)
    vars_weather  = variables = [os.path.basename(os.path.dirname(path)) for path in glob(os.path.join(PATH_VARIABLES, '*', '')) if os.path.basename(os.path.dirname(path)) != 'complete_files']
    location_json = json.load(open(PATH_LOCATIONS))
    list_result     = list()
    
    for index, row in df_csv.iterrows() :
        lat_lon = next((loc for loc in location_json['locations'] if loc['id'] == row['location']), None)
        aux_dic = {
            'location': row['location'],
            'lat': lat_lon['lat'],
            'lon': lat_lon['lon'],
            'date_time': row['date_time'],
            'path_image': row['path']
        }
    
        for var in vars_weather :
            var_aux_path = os.path.join(PATH_VARIABLES, var, f'{var}_{row.year}-{row.month}.nc')
            ds_aux       = xr.open_dataset(var_aux_path, engine='netcdf4')
            date_aux     = dt.strptime(row.date_time, '%Y-%m-%d %H:%M:%S').strftime('%d-%m-%Y_%H:%M')
            
            var_result   = closestValuesToLocation(lat_lon['lat'], lat_lon['lon'], date_aux, ds_aux)
            name_var     = list(var_result.keys())[-1]
            aux_dic[name_var] = var_result[name_var]
    
        list_result.append(aux_dic)

    return list_result

In [5]:
%%time
if __name__ == '__main__' :
    df = pd.DataFrame(generateDataFrameImages())
    df.to_csv(PATH_RESULT, index=False)

CPU times: total: 46.6 s
Wall time: 6min 52s
