In [1]:
import xarray as xr
import glob
import datetime
import pandas as pd
from pathlib import Path
import numpy as np
import warnings

In [2]:
warnings.filterwarnings('ignore')

In [3]:
files_observations_pm25 = glob.glob('../../data/interim/observations/*/*/*/pm25/*.nc')

In [4]:
def check_number_of_stations_and_distances(ds):
    number_stations = len(ds.station_id.values)
    try:
        distances = [round(x, 2) for x in ds.distance.values]
    except:
        distances = [round(x, 2) for x in [float(ds.distance.values)]]
    return number_stations, distances

def check_available_percentage_of_times(ds):
    ds = ds.mean('station_id').resample(time='1H').mean(skipna=False)
    length_of_times = len(ds.time.values)
    user_dates = pd.date_range(
        pd.to_datetime('2019-06-01'),
        pd.to_datetime('2021-03-31'),
        freq='H'
    )
    return round(100 * length_of_times / len(user_dates), 2)

In [5]:
data_pm25 = []
for file in files_observations_pm25:
    dict_data = {
        "file": None,
        "number_stations": None,
        "distances": None,
        "percentage_of_data": None
    }
    file_name = Path(file).name
    dict_data["file"] = file_name
    d = xr.open_dataset(file)
    number_stations, distances = check_number_of_stations_and_distances(d)
    percentage_of_available_data = check_available_percentage_of_times(d)
    dict_data["number_stations"] = number_stations
    dict_data["distances"] = distances
    dict_data["percentage_of_data"] = percentage_of_available_data
    data_pm25.append(dict_data)

In [6]:
import json
with open('observations_metadata_pm25.json', 'w') as f:
    json.dump(data_pm25, f)

In [7]:
files_observations_no2 = glob.glob('../../data/interim/observations/*/*/*/no2/*.nc')

In [9]:
data_no2 = []
for file in files_observations_no2:
    dict_data = {
        "file": None,
        "number_stations": None,
        "distances": None,
        "percentage_of_data": None
    }
    file_name = Path(file).name
    dict_data["file"] = file_name
    d = xr.open_dataset(file)
    number_stations, distances = check_number_of_stations_and_distances(d)
    percentage_of_available_data = check_available_percentage_of_times(d)
    dict_data["number_stations"] = number_stations
    dict_data["distances"] = distances
    dict_data["percentage_of_data"] = percentage_of_available_data
    data_no2.append(dict_data)

In [10]:
import json
with open('observations_metadata_no2.json', 'w') as f:
    json.dump(data_no2, f)

In [11]:
files_observations_o3 = glob.glob('../../data/interim/observations/*/*/*/o3/*.nc')

In [12]:
data_o3 = []
for file in files_observations_o3:
    dict_data = {
        "file": None,
        "number_stations": None,
        "distances": None,
        "percentage_of_data": None
    }
    file_name = Path(file).name
    dict_data["file"] = file_name
    d = xr.open_dataset(file)
    number_stations, distances = check_number_of_stations_and_distances(d)
    percentage_of_available_data = check_available_percentage_of_times(d)
    dict_data["number_stations"] = number_stations
    dict_data["distances"] = distances
    dict_data["percentage_of_data"] = percentage_of_available_data
    data_o3.append(dict_data)

In [13]:
import json
with open('observations_metadata_o3.json', 'w') as f:
    json.dump(data_o3, f)