In [1]:
from pathlib import Path
import pandas as pd
import os

from eee.resource.validation.database import ValidationDatabase

In [2]:
validation_database_path = Path.home()/"OneDrive-3E/Research/Solar/validation/validation_database"

In [3]:
## instantiate validation database
db = ValidationDatabase(validation_database_path)

In [4]:
sites_inv = db.get_sites()
sites_inv

SitesInventory(n=291)

In [5]:
sites_inv.df.name.unique()

array(['abashiri', 'alert', 'alice_springs', 'barrow', 'bermuda',
       'billings', 'bondville', 'boulder1', 'boulder', 'budapest_lorinc',
       'cabauw', 'camborne', 'carpentras', 'chesapeake_light',
       'concordia_station_dome_c', 'darwin', 'desert_rock', 'de_aar',
       'eastern_north_atlantic', 'fort_peck', 'fukuoka',
       'georg_von_neumayer', 'gobabeb', 'goodwin_creek', 'granite_island',
       'ishigakijima', 'izaña', 'kwajalein', 'lindenberg',
       'magurele_mars', 'minamitorishima', 'momote', 'nauru_island',
       'ny_ålesund', 'observatory_of_huancayo', 'paramaribo', 'payerne',
       'rock_springs', 'sapporo', 'sede_boqer',
       'selegua_mexico_solarimetric_station', 'sioux_falls', 'sonnblick',
       'southern_great_plains', 'south_pole', 'syowa', 'tamanrasset',
       'tateno', 'tiksi', 'toravere', 'bdfe2_feni',
       'ceb_substation_dapaong_togo', 'ceb_substation_davie_togo',
       'ceb_substation_malanville_benin', 'ceb_substation_parakou_benin',
       'c

In [6]:
site_name = ["bdfe2_feni"]

In [7]:
site_id = db.get_sites(names=site_name).site_ids[0]
site_id

'energydata_1'

In [8]:
site = sites_inv.get_site(site_id)
site

Site(name='bdfe2_feni')

## measurement data

In [9]:
ts = db.get_measurement_timeseries(
    site=site,
    validation_variables=["ghi"],
)

In [10]:
ts.dict()

{'timeseries_variables': [{'variable': {'name': 'ghi',
    'physical_parameter_id': 'ghi',
    'units': <Unit('watt / meter ** 2')>,
    'height': None,
    'time_granularity': Timedelta('0 days 00:01:00'),
    'start': Timestamp('2017-06-08 00:01:00'),
    'end': Timestamp('2019-09-30 23:59:00'),
    'temporal_aggregation_method': <TemporalAggregationMethod.MEAN: 'mean'>,
    'temporal_aggregation_period': Timedelta('0 days 00:01:00'),
    'temporal_aggregation_timestamp_convention': <TemporalAggregationTimestampConvention.UNDEFINED: 'undefined'>,
    'data_availability_percent': None},
   'data': time
   2017-06-08 00:01:00+00:00    69.84400
   2017-06-08 00:02:00+00:00    70.85630
   2017-06-08 00:03:00+00:00    72.73920
   2017-06-08 00:04:00+00:00    74.79130
   2017-06-08 00:05:00+00:00    77.35400
                                  ...   
   2019-09-30 23:55:00+00:00     1.36950
   2019-09-30 23:56:00+00:00     1.78500
   2019-09-30 23:57:00+00:00     1.98333
   2019-09-30 23:58:

In [11]:
ts.to_dataframe()

Unnamed: 0_level_0,ghi_mean_1min_undefined
time,Unnamed: 1_level_1
2017-06-08 00:01:00+00:00,69.84400
2017-06-08 00:02:00+00:00,70.85630
2017-06-08 00:03:00+00:00,72.73920
2017-06-08 00:04:00+00:00,74.79130
2017-06-08 00:05:00+00:00,77.35400
...,...
2019-09-30 23:55:00+00:00,1.36950
2019-09-30 23:56:00+00:00,1.78500
2019-09-30 23:57:00+00:00,1.98333
2019-09-30 23:58:00+00:00,2.27900


## simulation data

In [12]:
ts_sim = db.get_simulation_timeseries(
    site=site,
    validation_variables=["ghirrn"],
    model="solar-resource-api-1.6.12",
)

In [13]:
ts_sim

TimeseriesDataset(variables=[Variable(name=ghirrn)])

In [14]:
ts_sim.timeseries_variables[0].variable.start

Timestamp('2019-09-01 00:15:00')

In [15]:
ts_sim.to_dataframe()

Unnamed: 0_level_0,ghirrn_accumulated_15min_forward
time,Unnamed: 1_level_1
2019-09-01 00:15:00+00:00,3.52
2019-09-01 00:30:00+00:00,5.01
2019-09-01 00:45:00+00:00,7.04
2019-09-01 01:00:00+00:00,27.15
2019-09-01 01:15:00+00:00,42.05
...,...
2024-04-11 07:15:00+00:00,207.42
2024-04-11 07:30:00+00:00,196.28
2024-04-11 07:45:00+00:00,185.69
2024-04-11 08:00:00+00:00,175.94


In [16]:
ts_sim.variables[0]

Variable(name=ghirrn)

In [17]:
ts_sim.variables[0].as_dict

{'name': 'ghirrn',
 'physical_parameter_id': 'ghirrn',
 'units': 'Wh/m²',
 'height': None,
 'time_granularity': '15min',
 'start': '2019-09-01 00:15:00',
 'end': '2024-04-11 08:15:00',
 'temporal_aggregation_method': 'accumulated',
 'temporal_aggregation_period': '15min',
 'temporal_aggregation_timestamp_convention': 'forward',
 'data_availability_percent': None}

In [18]:
site_name = ["ceb_substation_davie_togo"]

In [19]:
site_id = db.get_sites(names=site_name).site_ids[0]
site_id

'energydata_3'

In [20]:
site = sites_inv.get_site(site_id)
site

Site(name='ceb_substation_davie_togo')

## measurement data

In [21]:
ts = db.get_measurement_timeseries(
    site=site,
    validation_variables=["ghi"],
)

In [22]:
ts.dict()

{'timeseries_variables': [{'variable': {'name': 'ghi',
    'physical_parameter_id': 'ghi',
    'units': <Unit('watt / meter ** 2')>,
    'height': None,
    'time_granularity': Timedelta('0 days 00:01:00'),
    'start': Timestamp('2021-11-03 05:37:00'),
    'end': Timestamp('2022-11-02 21:42:00'),
    'temporal_aggregation_method': <TemporalAggregationMethod.MEAN: 'mean'>,
    'temporal_aggregation_period': Timedelta('0 days 00:01:00'),
    'temporal_aggregation_timestamp_convention': <TemporalAggregationTimestampConvention.UNDEFINED: 'undefined'>,
    'data_availability_percent': None},
   'data': time
   2021-11-03 05:37:00+00:00    0.1
   2021-11-03 05:38:00+00:00    0.5
   2021-11-03 05:39:00+00:00    0.9
   2021-11-03 05:40:00+00:00    1.3
   2021-11-03 05:41:00+00:00    1.7
                               ... 
   2022-11-02 21:38:00+00:00    NaN
   2022-11-02 21:39:00+00:00    NaN
   2022-11-02 21:40:00+00:00    NaN
   2022-11-02 21:41:00+00:00    NaN
   2022-11-02 21:42:00+00:00 

In [23]:
ts.to_dataframe()

Unnamed: 0_level_0,ghi_mean_1min_undefined
time,Unnamed: 1_level_1
2021-11-03 05:37:00+00:00,0.1
2021-11-03 05:38:00+00:00,0.5
2021-11-03 05:39:00+00:00,0.9
2021-11-03 05:40:00+00:00,1.3
2021-11-03 05:41:00+00:00,1.7
...,...
2022-11-02 21:38:00+00:00,
2022-11-02 21:39:00+00:00,
2022-11-02 21:40:00+00:00,
2022-11-02 21:41:00+00:00,


## simulation data

In [24]:
ts_sim = db.get_simulation_timeseries(
    site=site,
    validation_variables=["ghirrn"],
    model="solar-resource-api-1.6.12",
)

In [25]:
ts_sim

TimeseriesDataset(variables=[Variable(name=ghirrn)])

In [26]:
ts_sim.timeseries_variables[0].variable.start

Timestamp('2004-02-01 00:15:00')

In [27]:
ts_sim.to_dataframe()

Unnamed: 0_level_0,ghirrn_accumulated_15min_forward
time,Unnamed: 1_level_1
2004-02-01 00:15:00+00:00,0.00
2004-02-01 00:30:00+00:00,0.00
2004-02-01 00:45:00+00:00,0.00
2004-02-01 01:00:00+00:00,0.00
2004-02-01 01:15:00+00:00,0.00
...,...
2024-04-11 07:15:00+00:00,35.96
2024-04-11 07:30:00+00:00,49.43
2024-04-11 07:45:00+00:00,50.47
2024-04-11 08:00:00+00:00,49.24


In [28]:
ts_sim.variables[0]

Variable(name=ghirrn)

In [29]:
ts_sim.variables[0].as_dict

{'name': 'ghirrn',
 'physical_parameter_id': 'ghirrn',
 'units': 'Wh/m²',
 'height': None,
 'time_granularity': '15min',
 'start': '2004-02-01 00:15:00',
 'end': '2024-04-11 08:15:00',
 'temporal_aggregation_method': 'accumulated',
 'temporal_aggregation_period': '15min',
 'temporal_aggregation_timestamp_convention': 'forward',
 'data_availability_percent': None}

In [30]:
site_name = ["hrazdan"]

In [31]:
site_id = db.get_sites(names=site_name).site_ids[0]
site_id

'energydata_13'

In [32]:
site = sites_inv.get_site(site_id)
site

Site(name='hrazdan')

## measurement data

In [33]:
ts = db.get_measurement_timeseries(
    site=site,
    validation_variables=["ghi"],
)

In [34]:
ts.dict()

{'timeseries_variables': [{'variable': {'name': 'ghi',
    'physical_parameter_id': 'ghi',
    'units': <Unit('watt / meter ** 2')>,
    'height': None,
    'time_granularity': Timedelta('0 days 00:10:00'),
    'start': Timestamp('2016-05-19 21:05:00'),
    'end': Timestamp('2017-06-04 16:15:00'),
    'temporal_aggregation_method': <TemporalAggregationMethod.MEAN: 'mean'>,
    'temporal_aggregation_period': Timedelta('0 days 00:10:00'),
    'temporal_aggregation_timestamp_convention': <TemporalAggregationTimestampConvention.UNDEFINED: 'undefined'>,
    'data_availability_percent': None},
   'data': time
   2016-05-19 21:05:00+00:00      0.00
   2016-05-19 21:15:00+00:00       NaN
   2016-05-19 21:25:00+00:00       NaN
   2016-05-19 21:35:00+00:00       NaN
   2016-05-19 21:45:00+00:00       NaN
                                 ...  
   2017-06-04 15:35:00+00:00    109.07
   2017-06-04 15:45:00+00:00     77.40
   2017-06-04 15:55:00+00:00     53.48
   2017-06-04 16:05:00+00:00     36.79

In [35]:
ts.to_dataframe()

Unnamed: 0_level_0,ghi_mean_10min_undefined
time,Unnamed: 1_level_1
2016-05-19 21:05:00+00:00,0.00
2016-05-19 21:15:00+00:00,
2016-05-19 21:25:00+00:00,
2016-05-19 21:35:00+00:00,
2016-05-19 21:45:00+00:00,
...,...
2017-06-04 15:35:00+00:00,109.07
2017-06-04 15:45:00+00:00,77.40
2017-06-04 15:55:00+00:00,53.48
2017-06-04 16:05:00+00:00,36.79


## simulation data

In [36]:
ts_sim = db.get_simulation_timeseries(
    site=site,
    validation_variables=["ghirrn"],
    model="solar-resource-api-1.6.12",
)

In [37]:
ts_sim

TimeseriesDataset(variables=[Variable(name=ghirrn)])

In [38]:
ts_sim.timeseries_variables[0].variable.start

Timestamp('2004-02-01 00:15:00')

In [39]:
ts_sim.to_dataframe()

Unnamed: 0_level_0,ghirrn_accumulated_15min_forward
time,Unnamed: 1_level_1
2004-02-01 00:15:00+00:00,0.00
2004-02-01 00:30:00+00:00,0.00
2004-02-01 00:45:00+00:00,0.00
2004-02-01 01:00:00+00:00,0.00
2004-02-01 01:15:00+00:00,0.00
...,...
2024-04-11 07:30:00+00:00,100.56
2024-04-11 07:45:00+00:00,95.60
2024-04-11 08:00:00+00:00,66.32
2024-04-11 08:15:00+00:00,49.30


In [40]:
ts_sim.variables[0]

Variable(name=ghirrn)

In [41]:
ts_sim.variables[0].as_dict

{'name': 'ghirrn',
 'physical_parameter_id': 'ghirrn',
 'units': 'Wh/m²',
 'height': None,
 'time_granularity': '15min',
 'start': '2004-02-01 00:15:00',
 'end': '2024-04-11 08:30:00',
 'temporal_aggregation_method': 'accumulated',
 'temporal_aggregation_period': '15min',
 'temporal_aggregation_timestamp_convention': 'forward',
 'data_availability_percent': None}