In [1]:
import pandas as pd
import numpy as np
import os
import yaml

import sys
from pathlib import Path
sys.path.insert(0,str(Path(os.path.abspath('.')).parent.parent))
import library.validation.parsing as parsing
import library.validation.SoRMS.inventory as inventorying

In [2]:
ticket_path_rms = Path.home()/"OneDrive-3E/Research/Solar/tickets/2023/IN2866_SoRMS_measurement_data"
final_path_rms =  os.path.join(ticket_path_rms, 'final_data')
meta_path_rms =  os.path.join(ticket_path_rms, 'metadata')
yaml_path = Path.home()/"OneDrive-3E/Research/Solar/tickets/2023/IN2983_final_parsing"

In [3]:
with open(os.path.join(yaml_path, 'providers.yml'), 'r') as file:
    site_info = yaml.safe_load(file)
    
# Extract information
solrad_info = site_info.get('sorms', {})

In [4]:
metadata = pd.read_csv(os.path.join(meta_path_rms, 'data.csv'))
metadata['normalized station name'] = metadata['station name'].apply(parsing.normalize_station_name)

In [5]:
inventory = pd.DataFrame()

for file_name in os.listdir(final_path_rms):
    if file_name.endswith('.csv'):
        df = pd.read_csv(os.path.join(final_path_rms, file_name), index_col = 0)
        variable = df.columns
        site_name = os.path.splitext(file_name)[0]

        # from metadata
        latitude, longitude = inventorying.extract_from_metadata(site_name, metadata)

            
        # from final_data
        start_times, end_times, completenesses, parameter_ids, timedelta_obj = inventorying.extract_from_final_data(df)

        
        # from yaml file
        source, domain, classification, unit, temporal_aggregation_method, temporal_aggregation_convention = inventorying.extract_from_yaml_file(solrad_info, df)
        
              
        
        # Create a row for the inventory
        row = pd.DataFrame({
            'name': [site_name] * len(variable),
            'domain': domain,
            'latitude': [latitude] * len(variable),
            'longitude': [longitude] * len(variable),
            'source': source,
            'classification': classification,
            'device_type': 'pyronometer',
            'pyranometer_type': '',
            'variable_name': variable,
            'variable_physical_parameter_id': parameter_ids,
            'variable_units': unit,
            'variable_time_granularity': [timedelta_obj] * len(variable),
            'variable_start': start_times,
            'variable_end': end_times,
            'variable_temporal_aggregation_method': temporal_aggregation_method,
            'variable_temporal_aggregation_period': [timedelta_obj] * len(variable),
            'variable_temporal_aggregation_convention': temporal_aggregation_convention,
            'variable_data_availability_percent': completenesses,
            'timeseries_path': os.path.join(final_path_rms, file_name)
        })

        # Concatenate to the inventory dataframe
        inventory = pd.concat([inventory, row], ignore_index=True)


In [6]:
inventory['device_type'] = inventory.apply(lambda row: inventorying.get_device(row['name'], row['variable_name']) if row['variable_name'] else None, axis=1)


In [7]:
inventory['pyranometer_type'] = inventory.apply(lambda row: inventorying.get_pyranometer(row['name'], row['variable_name']) if row['variable_name'] else None, axis=1)
inventory['pyranometer_type'].replace({None: ''}, inplace=True)


In [8]:
inventory.to_csv(os.path.join(ticket_path_rms, 'inventory.csv'), index = False)