In [1]:
from pathlib import Path
import pandas as pd
import os
import numpy as np

from eee.resource.validation.database import ValidationDatabase
from eee.resource.validation.models.site import Site, SitesInventory
from eee.ontology.variables import Variable
from eee.resource.validation import REGISTRY
from eee.resource.validation.models.timeseries import TimeseriesVariable,TimeseriesDataset, TimeseriesVariableStandardizer

In [2]:
TICKET_DIR = Path.home()/"OneDrive-3E/Research/Solar/tickets/2023/IN2818_FLUXNET_measurement_data"

In [3]:
raw_data_dir = os.path.join(TICKET_DIR, "raw_data")
intermediate_data_dir = os.path.join(TICKET_DIR, "intermediate_data")
parsed_data_dir = os.path.join(TICKET_DIR, "final_data")

In [4]:
validation_database_path = Path.home()/"OneDrive-3E/Research/Solar/validation/validation_database"

In [5]:
variables_match = {
    "ghi": "ghi",
    "dhi": "dhi",
    "dni": "dni",
}

## get list of stations already pushed

In [6]:
## instantiate validation database
db = ValidationDatabase(validation_database_path)

In [7]:
db_stations = db.get_sites()

In [8]:
db_stations_name = db_stations.df.name.unique()
db_stations_name

array(['abashiri', 'alert', 'alice_springs', 'barrow', 'bermuda',
       'billings', 'bondville', 'boulder1', 'boulder', 'budapest_lorinc',
       'cabauw', 'camborne', 'carpentras', 'chesapeake_light',
       'concordia_station_dome_c', 'darwin', 'desert_rock', 'de_aar',
       'eastern_north_atlantic', 'fort_peck', 'fukuoka',
       'georg_von_neumayer', 'gobabeb', 'goodwin_creek', 'granite_island',
       'ishigakijima', 'izaña', 'kwajalein', 'lindenberg',
       'magurele_mars', 'minamitorishima', 'momote', 'nauru_island',
       'ny_ålesund', 'observatory_of_huancayo', 'paramaribo', 'payerne',
       'rock_springs', 'sapporo', 'sede_boqer',
       'selegua_mexico_solarimetric_station', 'sioux_falls', 'sonnblick',
       'southern_great_plains', 'south_pole', 'syowa', 'tamanrasset',
       'tateno', 'tiksi', 'toravere', 'bdfe2_feni',
       'ceb_substation_dapaong_togo', 'ceb_substation_davie_togo',
       'ceb_substation_malanville_benin', 'ceb_substation_parakou_benin',
       'c

In [9]:
# site_name = "kwajalein"
# site_id = db.get_sites(names=site_name).site_ids[0]
# site = db_stations.get_site(site_id)
# site

In [10]:
# db_stations.remove_site(site)

## read inventory and remove already pushed sites

In [11]:
## read inventory
inventory = pd.read_csv(os.path.join(TICKET_DIR, "inventory.csv"))
len(inventory)

75

In [12]:
inventory = inventory[~inventory["name"].isin(db_stations_name)]
len(inventory)

31

In [13]:
# inventory = inventory.iloc[4:6,]

## push

In [14]:
inventory.columns

Index(['name', 'domain', 'latitude', 'longitude', 'source', 'classification',
       'device_type', 'pyrnanometer_type', 'variable_name',
       'variable_physical_parameter_id', 'variable_units',
       'variable_time_granularity', 'variable_start', 'variable_end',
       'variable_temporal_aggregation_method',
       'variable_temporal_aggregation_period',
       'variable_temporal_aggregation_convention',
       'variable_data_availability_percent', 'timeseries_path'],
      dtype='object')

In [15]:
list_files = inventory["timeseries_path"].unique()
len(list_files)

31

In [16]:
# list_files = list_files[:3]
# len(list_files)

In [17]:
for file in list_files:
    inventory_station = inventory[inventory["timeseries_path"] == file]
    station = inventory_station.iloc[0]["name"]
    print(f"{station} push start")
    
    df_timeseries = pd.read_csv(file, index_col=0)
    df_timeseries.index = pd.DatetimeIndex(df_timeseries.index)
        
    list_of_variables = []
    list_of_standardized_ts_variable = []
    
    for index, row in inventory_station.iterrows():
        variable = Variable(name = row.variable_name, 
                            physical_parameter_id=row.variable_physical_parameter_id,
                            units = row.variable_units,
                            time_granularity = row.variable_time_granularity,
                            start = row.variable_start,
                            end = row.variable_end,
                            temporal_aggregation_method = row.variable_temporal_aggregation_method,
                            temporal_aggregation_period = row.variable_time_granularity,
                            temporal_aggregation_timestamp_convention=row.variable_temporal_aggregation_convention,
                            data_availability_percent = row.variable_data_availability_percent)

        var_timeseries = df_timeseries.loc[:,row.variable_name]
        ts_variable = TimeseriesVariable(variable=variable,data=var_timeseries)

        list_of_variables.append(variable)
        list_of_standardized_ts_variable.append(ts_variable)
    
    site = Site(name = inventory_station.iloc[0]["name"],
                domain = inventory_station.iloc[0]["domain"], 
                latitude = inventory_station.iloc[0]["latitude"],
                longitude = inventory_station.iloc[0]["longitude"],
                variables = list_of_variables,
                source = REGISTRY.datasources(id=inventory_station.iloc[0]["source"]),
                classification = inventory_station.iloc[0]["classification"],
                device_type = inventory_station.iloc[0]["device_type"],
           )
    ts_dataset = TimeseriesDataset(timeseries_variables=list_of_standardized_ts_variable)
    
    db.add_site(site)
    db.add_measurement_timeseries(site, ts_dataset)
    print(f"{station} push completed")

mase_rice_paddy_field push start
mase_rice_paddy_field push completed
mayberry_wetland push start
mayberry_wetland push completed
nc_alligatorriver push start
nc_alligatorriver push completed
neustift push start
neustift push completed
ngee_arctic_barrow push start
ngee_arctic_barrow push completed
ngee_arctic_council push start
ngee_arctic_council push completed
northern_pantanal_wetland push start
northern_pantanal_wetland push completed
nxaraga push start
nxaraga push completed
oensingen_crop push start
oensingen_crop push completed
old_woman_creek push start
old_woman_creek push completed
olentangy_river_wetland_research_park push start
olentangy_river_wetland_research_park push completed
palangkaraya_undrained_forest push start
palangkaraya_undrained_forest push completed
park_falls_wlef push start
park_falls_wlef push completed
philippines_rice_instituteflooded push start
philippines_rice_instituteflooded push completed
pointe_aux_chenes_brackishmarsh push start
pointe_aux_chenes