In [1]:
from datetime import datetime
from glob import glob
import pandas as pd
import xarray as xr
import numpy as np
import json
import os

In [2]:
PATH_RESOURCES = os.path.join('..', '..', 'resources')
PATH_LOCATIONS = os.path.join(PATH_RESOURCES, 'utils', 'locations-webcam.json')
PATH_DATA      = os.path.join(PATH_RESOURCES, 'reanalysis-era5-single-levels')

In [3]:
def createDataFrameVars():
    list_var_path       = glob(os.path.join(PATH_DATA, '*', ''))
    dic_var_all_paths  = list()

    for var_path in list_var_path:
        for var_file in glob(os.path.join(var_path, '*.netcdf')):
            path_var_basename = os.path.basename(var_file)
            year  = int(path_var_basename.split('_')[-2])
            month = int(path_var_basename.split('_')[-3])
            date  = datetime(year, month, 1)

            dic_var_all_paths.append({
                'variable' : var_path.split(os.sep)[-2],
                'date_time': date.strftime('%Y-%m'),
                'path'    : var_file
            })

    return pd.DataFrame(dic_var_all_paths)

In [4]:
def createDataFrameLocs():
    with open(PATH_LOCATIONS, 'r') as file:
        df = pd.DataFrame(json.load(file)['locations'])

    return df

In [5]:
def returnValuesLocations(df_locs, df_vars, path_results):
    list_data_loc = list()

    for i, row_var in df_vars.iterrows():
        ds_aux       = xr.open_dataset(row_var.path, engine='netcdf4')
        name_aux_var = list(ds_aux.data_vars.keys())[0] # unique var per file

        if len(ds_aux.dims) == 3:
            for j, row_loc in df_locs.iterrows():
                lat_loc = row_loc.lat
                lon_loc = row_loc.lon

                # Find closest Latitude
                lats_np       = ds_aux.latitude.values
                close_idx_lat = np.abs(lats_np - lat_loc).argmin()

                # Find closest Longitude
                lons_np       = ds_aux.longitude.values
                close_idx_lon = np.abs(lons_np - lon_loc).argmin()

                # Loop around all the times
                for index, time in enumerate(ds_aux.time.values):

                    list_data_loc.append({
                        'variable' : row_var.variable,
                        'date_time': time,
                        'location' : row_loc.id,
                        'value'    : ds_aux[name_aux_var].values[index][close_idx_lat][close_idx_lon],
                        'path'     : row_var.path
                    })
        else: 
            print("Error in dimension on this file: ", row_var.path)
            print(ds_aux.dims)
            print(len(ds_aux.dims), '\n\n')
    
    # Pivot the dataset to include in the same row all variables by date_time and location
    df = pd.DataFrame(list_data_loc)
    df = df.pivot_table(index=['date_time', 'location'], columns='variable', values='value', aggfunc='first').reset_index()
    df.columns.name = None
    
    df.to_csv(path_results, index = False)
    
    return df

In [6]:
df_locs = createDataFrameLocs()
df_vars = createDataFrameVars()

path_results = os.path.join(PATH_RESOURCES, 'result_gis', 'locations_pobo_cburgos.csv')
df_vars_locs = returnValuesLocations(df_locs, df_vars, path_results)
df_vars_locs

Error in dimension on this file:  ..\..\resources\reanalysis-era5-single-levels\cloud_base_height\cloud_base_height_3_2024_hourly.netcdf
4 




Unnamed: 0,date_time,location,cloud_base_height,high_cloud_cover,low_cloud_cover,medium_cloud_cover,total_cloud_cover
0,2024-01-01 00:00:00,cburgos,665.405525,0.943082,0.072971,0.943357,0.973357
1,2024-01-01 00:00:00,pobo,58.514332,0.989990,0.392459,0.953336,0.992218
2,2024-01-01 01:00:00,cburgos,605.255680,0.939206,0.000000,0.907161,0.967192
3,2024-01-01 01:00:00,pobo,58.514332,1.000000,0.258603,0.958342,1.000000
4,2024-01-01 02:00:00,cburgos,593.225711,0.918758,0.004822,0.582180,0.918881
...,...,...,...,...,...,...,...
5563,2024-04-29 21:00:00,pobo,1877.465626,0.255337,0.000000,0.963286,0.974395
5564,2024-04-29 22:00:00,cburgos,,0.000000,0.000000,0.001221,0.001221
5565,2024-04-29 22:00:00,pobo,2485.192938,0.464758,0.084385,0.177086,0.562556
5566,2024-04-29 23:00:00,cburgos,,0.000000,0.000000,0.025331,0.025331
