diff --git a/setup.py b/setup.py index ff3d000..c6989e6 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,8 @@ def read(fname): scripts=[], package_data={ 'viresclient': [ - '_wps/templates/*' + '_wps/templates/*', + '_data/*' ], }, python_requires='>=3.6', diff --git a/viresclient/__init__.py b/viresclient/__init__.py index 159c893..aefe1aa 100644 --- a/viresclient/__init__.py +++ b/viresclient/__init__.py @@ -34,5 +34,7 @@ from ._data_handling import ReturnedDataFile from ._api.upload import DataUpload from ._api.token import TokenManager +from . import _data -__version__ = "0.7.2" + +__version__ = "0.8.0-alpha" diff --git a/viresclient/_client.py b/viresclient/_client.py index 3243626..614432a 100644 --- a/viresclient/_client.py +++ b/viresclient/_client.py @@ -35,7 +35,7 @@ try: from IPython import get_ipython IN_JUPYTER = 'zmqshell' in str(type(get_ipython())) -except ImportError: +except Exception: IN_JUPYTER = False from tqdm import tqdm from io import StringIO @@ -79,6 +79,9 @@ # Maximum selectable time interval ~25 years MAX_TIME_SELECTION = timedelta(days=25*365.25) +# Maximum time-chunk size ~50 years +MAX_CHUNK_DURATION = 2 * MAX_TIME_SELECTION + TEMPLATE_FILES = { 'list_jobs': "vires_list_jobs.xml", @@ -395,9 +398,9 @@ def _chunkify_request(start_time, end_time, sampling_step, nrecords_limit): e.g. [(start1, end1), (start2, end2)] """ # maximum chunk duration as a timedelta object - chunk_duration = timedelta(seconds=( + chunk_duration = min(timedelta(seconds=( nrecords_limit * parse_duration(sampling_step).total_seconds() - )) + )), MAX_CHUNK_DURATION) # calculate the chunk intervals ... request_intervals = [] diff --git a/viresclient/_client_swarm.py b/viresclient/_client_swarm.py index de15fbd..a65102a 100644 --- a/viresclient/_client_swarm.py +++ b/viresclient/_client_swarm.py @@ -12,7 +12,7 @@ from ._wps.time_util import parse_datetime from ._client import WPSInputs, ClientRequest, TEMPLATE_FILES from ._data_handling import ReturnedDataFile - +from ._data import CONFIG_SWARM TEMPLATE_FILES = { **TEMPLATE_FILES, @@ -33,7 +33,7 @@ MODEL_REFERENCES = { 'IGRF': - (" International Geomagnetic Reference Field: the 13th generation, (waiting for publication) ", + (" International Geomagnetic Reference Field: the thirteenth generation, (https://doi.org/10.1186/s40623-020-01288-x) ", " https://www.ngdc.noaa.gov/IAGA/vmod/igrf.html "), 'IGRF12': (" International Geomagnetic Reference Field: the 12th generation, https://doi.org/10.1186/s40623-015-0228-9 ", @@ -161,6 +161,7 @@ "AUX_OBSH": ("https://doi.org/10.5047/eps.2013.07.011",), "AUX_OBSM": ("https://doi.org/10.5047/eps.2013.07.011",), "AUX_OBSS": ("https://doi.org/10.5047/eps.2013.07.011",), + "VOBS_SW_1M": ("https://www.space.dtu.dk/english/research/projects/project-descriptions/geomagnetic-virtual-observatories",), } DATA_CITATIONS = { @@ -169,8 +170,10 @@ "AUX_OBSS": "ftp://ftp.nerc-murchison.ac.uk/geomag/Swarm/AUX_OBS/second/README", } -IAGA_CODES = ['AAA', 'AAE', 'ABG', 'ABK', 'AIA', 'ALE', 'AMS', 'API', 'AQU', 'ARS', 'ASC', 'ASP', 'BDV', 'BEL', 'BFE', 'BFO', 'BGY', 'BJN', 'BLC', 'BMT', 'BNG', 'BOU', 'BOX', 'BRD', 'BRW', 'BSL', 'CBB', 'CBI', 'CDP', 'CKI', 'CLF', 'CMO', 'CNB', 'CNH', 'COI', 'CPL', 'CSY', 'CTA', 'CTS', 'CYG', 'CZT', 'DED', 'DLR', 'DLT', 'DMC', 'DOB', 'DOU', 'DRV', 'DUR', 'EBR', 'ELT', 'ESA', 'ESK', 'EYR', 'FCC', 'FRD', 'FRN', 'FUQ', 'FUR', 'GAN', 'GCK', 'GDH', 'GLM', 'GLN', 'GNA', 'GNG', 'GUA', 'GUI', 'GZH', 'HAD', 'HBK', 'HER', 'HLP', 'HON', 'HRB', 'HRN', 'HUA', 'HYB', 'IPM', 'IQA', 'IRT', 'IZN', 'JAI', 'JCO', 'KAK', 'KDU', 'KEP', 'KHB', 'KIR', 'KIV', 'KMH', 'KNY', 'KNZ', 'KOU', 'KSH', 'LER', 'LIV', 'LMM', 'LNP', 'LON', 'LOV', 'LRM', 'LRV', 'LVV', 'LYC', 'LZH', 'MAB', 'MAW', 'MBC', 'MBO', 'MCQ', 'MEA', 'MGD', 'MID', 'MIZ', 'MMB', 'MZL', 'NAQ', 'NCK', 'NEW', 'NGK', 'NGP', 'NMP', 'NUR', 'NVS', 'ORC', 'OTT', 'PAF', 'PAG', 'PBQ', 'PEG', 'PET', 'PHU', 'PIL', 'PND', 'PPT', 'PST', 'QGZ', 'QIX', 'QSB', 'QZH', 'RES', 'SBA', 'SBL', 'SFS', 'SHE', 'SHL', 'SHU', 'SIL', 'SIT', 'SJG', 'SOD', 'SPG', 'SPT', 'STJ', 'SUA', 'TAM', 'TAN', 'TDC', 'TEO', 'THJ', 'THL', 'THY', 'TIR', 'TND', 'TRO', 'TRW', 'TSU', 'TUC', 'UPS', 'VAL', 'VIC', 'VNA', 'VOS', 'VSK', 'VSS', 'WHN', 'WIC', 'WIK', 'WNG', 'YAK', 'YKC'] +# IAGA_CODES = ['AAA', 'AAE', 'ABG', 'ABK', 'AIA', 'ALE', 'AMS', 'API', 'AQU', 'ARS', 'ASC', 'ASP', 'BDV', 'BEL', 'BFE', 'BFO', 'BGY', 'BJN', 'BLC', 'BMT', 'BNG', 'BOU', 'BOX', 'BRD', 'BRW', 'BSL', 'CBB', 'CBI', 'CDP', 'CKI', 'CLF', 'CMO', 'CNB', 'CNH', 'COI', 'CPL', 'CSY', 'CTA', 'CTS', 'CYG', 'CZT', 'DED', 'DLR', 'DLT', 'DMC', 'DOB', 'DOU', 'DRV', 'DUR', 'EBR', 'ELT', 'ESA', 'ESK', 'EYR', 'FCC', 'FRD', 'FRN', 'FUQ', 'FUR', 'GAN', 'GCK', 'GDH', 'GLM', 'GLN', 'GNA', 'GNG', 'GUA', 'GUI', 'GZH', 'HAD', 'HBK', 'HER', 'HLP', 'HON', 'HRB', 'HRN', 'HUA', 'HYB', 'IPM', 'IQA', 'IRT', 'IZN', 'JAI', 'JCO', 'KAK', 'KDU', 'KEP', 'KHB', 'KIR', 'KIV', 'KMH', 'KNY', 'KNZ', 'KOU', 'KSH', 'LER', 'LIV', 'LMM', 'LNP', 'LON', 'LOV', 'LRM', 'LRV', 'LVV', 'LYC', 'LZH', 'MAB', 'MAW', 'MBC', 'MBO', 'MCQ', 'MEA', 'MGD', 'MID', 'MIZ', 'MMB', 'MZL', 'NAQ', 'NCK', 'NEW', 'NGK', 'NGP', 'NMP', 'NUR', 'NVS', 'ORC', 'OTT', 'PAF', 'PAG', 'PBQ', 'PEG', 'PET', 'PHU', 'PIL', 'PND', 'PPT', 'PST', 'QGZ', 'QIX', 'QSB', 'QZH', 'RES', 'SBA', 'SBL', 'SFS', 'SHE', 'SHL', 'SHU', 'SIL', 'SIT', 'SJG', 'SOD', 'SPG', 'SPT', 'STJ', 'SUA', 'TAM', 'TAN', 'TDC', 'TEO', 'THJ', 'THL', 'THY', 'TIR', 'TND', 'TRO', 'TRW', 'TSU', 'TUC', 'UPS', 'VAL', 'VIC', 'VNA', 'VOS', 'VSK', 'VSS', 'WHN', 'WIC', 'WIK', 'WNG', 'YAK', 'YKC'] +IAGA_CODES = CONFIG_SWARM.get("IAGA_CODES") +VOBS_SITES = CONFIG_SWARM.get("VOBS_SITES") class SwarmWPSInputs(WPSInputs): """Holds the set of inputs to be passed to the request template for Swarm @@ -233,8 +236,8 @@ def _spacecraft_from_collection(collection): else: # 12th character in name, e.g. SW_OPER_MAGx_LR_1B sc = collection[11] - sc_to_name = {"A": "Alpha", "B": "Bravo", "C": "Charlie", "_": "NSC"} - name = sc_to_name[sc] + sc_to_name = {"A": "Alpha", "B": "Bravo", "C": "Charlie"} + name = sc_to_name.get(sc, "NSC") return name def set_collections(self, collections): @@ -421,9 +424,66 @@ class SwarmRequest(ClientRequest): "AUX_OBSS": [ "SW_OPER_AUX_OBSS2_", *[f"SW_OPER_AUX_OBSS2_:{code}" for code in IAGA_CODES] - ] + ], + "VOBS_SW_1M": [ + "SW_OPER_VOBS_1M_2_", + *[f"SW_OPER_VOBS_1M_2_:{site}" for site in VOBS_SITES] + ], + "VOBS_SW_4M": [ + "SW_OPER_VOBS_4M_2_", + *[f"SW_OPER_VOBS_4M_2_:{site}" for site in VOBS_SITES] + ], + "VOBS_CH_1M": [ + "CH_OPER_VOBS_1M_2_", + *[f"CH_OPER_VOBS_1M_2_:{site}" for site in VOBS_SITES] + ], + "VOBS_CH_4M": [ + "CH_OPER_VOBS_4M_2_", + *[f"CH_OPER_VOBS_4M_2_:{site}" for site in VOBS_SITES] + ], + "VOBS_CR_4M": [ + "CR_OPER_VOBS_4M_2_", + *[f"CR_OPER_VOBS_4M_2_:{site}" for site in VOBS_SITES] + ], + "VOBS_SW_1M:SecularVariation": [ + "SW_OPER_VOBS_1M_2_:SecularVariation", + *[f"SW_OPER_VOBS_1M_2_:SecularVariation:{site}" for site in VOBS_SITES] + ], + "VOBS_SW_4M:SecularVariation": [ + "SW_OPER_VOBS_4M_2_:SecularVariation", + *[f"SW_OPER_VOBS_4M_2_:SecularVariation:{site}" for site in VOBS_SITES] + ], + "VOBS_CH_1M:SecularVariation": [ + "CH_OPER_VOBS_1M_2_:SecularVariation", + *[f"CH_OPER_VOBS_1M_2_:SecularVariation:{site}" for site in VOBS_SITES] + ], + "VOBS_CH_4M:SecularVariation": [ + "CH_OPER_VOBS_4M_2_:SecularVariation", + *[f"CH_OPER_VOBS_4M_2_:SecularVariation:{site}" for site in VOBS_SITES] + ], + "VOBS_CR_4M:SecularVariation": [ + "CR_OPER_VOBS_4M_2_:SecularVariation", + *[f"CR_OPER_VOBS_4M_2_:SecularVariation:{site}" for site in VOBS_SITES] + ], } + OBS_COLLECTIONS = [ + "SW_OPER_AUX_OBSH2_", + "SW_OPER_AUX_OBSM2_", + "SW_OPER_AUX_OBSS2_", + "SW_OPER_VOBS_1M_2_", + "SW_OPER_VOBS_4M_2_", + "CH_OPER_VOBS_1M_2_", + "CH_OPER_VOBS_4M_2_", + "CR_OPER_VOBS_4M_2_", + "SW_OPER_VOBS_1M_2_:SecularVariation", + "SW_OPER_VOBS_4M_2_:SecularVariation", + "CH_OPER_VOBS_1M_2_:SecularVariation", + "CH_OPER_VOBS_4M_2_:SecularVariation", + "CR_OPER_VOBS_4M_2_:SecularVariation", + ] + + # These are not necessarily real sampling steps, but are good enough to use # for splitting long requests into chunks COLLECTION_SAMPLING_STEPS = { @@ -439,7 +499,17 @@ class SwarmRequest(ClientRequest): "AEJ_LPS": "PT1S", "AUX_OBSH": "PT60M", "AUX_OBSM": "PT60S", - "AUX_OBSS": "PT1S" + "AUX_OBSS": "PT1S", + "VOBS_SW_1M": "P31D", + "VOBS_CH_1M": "P31D", + "VOBS_SW_4M": "P122D", + "VOBS_CH_4M": "P122D", + "VOBS_CR_4M": "P122D", + "VOBS_SW_1M:SecularVariation": "P31D", + "VOBS_CH_1M:SecularVariation": "P31D", + "VOBS_SW_4M:SecularVariation": "P122D", + "VOBS_CH_4M:SecularVariation": "P122D", + "VOBS_CR_4M:SecularVariation": "P122D", } PRODUCT_VARIABLES = { @@ -501,9 +571,19 @@ class SwarmRequest(ClientRequest): "Latitude_QD", "Longitude_QD", "MLT_QD", "Boundary_Flag", "Quality", "Pair_Indicator" ], - "AUX_OBSH": ["B_NEC", "F", "IAGA_code", "Quality", "SensorIndex"], + "AUX_OBSH": ["B_NEC", "F", "IAGA_code", "Quality", "ObsIndex"], "AUX_OBSM": ["B_NEC", "F", "IAGA_code", "Quality"], "AUX_OBSS": ["B_NEC", "F", "IAGA_code", "Quality"], + "VOBS_SW_1M": ["SiteCode", "B_CF", "B_OB", "sigma_CF", "sigma_OB"], + "VOBS_CH_1M": ["SiteCode", "B_CF", "B_OB", "sigma_CF", "sigma_OB"], + "VOBS_SW_4M": ["SiteCode", "B_CF", "B_OB", "sigma_CF", "sigma_OB"], + "VOBS_CH_4M": ["SiteCode", "B_CF", "B_OB", "sigma_CF", "sigma_OB"], + "VOBS_CR_4M": ["SiteCode", "B_CF", "B_OB", "sigma_CF", "sigma_OB"], + "VOBS_SW_1M:SecularVariation": ["SiteCode", "B_SV", "sigma_SV"], + "VOBS_CH_1M:SecularVariation": ["SiteCode", "B_SV", "sigma_SV"], + "VOBS_SW_4M:SecularVariation": ["SiteCode", "B_SV", "sigma_SV"], + "VOBS_CH_4M:SecularVariation": ["SiteCode", "B_SV", "sigma_SV"], + "VOBS_CR_4M:SecularVariation": ["SiteCode", "B_SV", "sigma_SV"], } AUXILIARY_VARIABLES = [ @@ -622,11 +702,22 @@ def available_collections(self, groupname=None, details=True): If False then return a dict of available collections. """ - # Shorter form of the available collections + # Shorter form of the available collections, + # without all the individual SiteCodes collections_short = self._available["collections"].copy() - collections_short["AUX_OBSS"] = ['SW_OPER_AUX_OBSS2_'] - collections_short["AUX_OBSM"] = ['SW_OPER_AUX_OBSM2_'] - collections_short["AUX_OBSH"] = ['SW_OPER_AUX_OBSH2_'] + collections_short["AUX_OBSS"] = ["SW_OPER_AUX_OBSS2_"] + collections_short["AUX_OBSM"] = ["SW_OPER_AUX_OBSM2_"] + collections_short["AUX_OBSH"] = ["SW_OPER_AUX_OBSH2_"] + collections_short["VOBS_SW_1M"] = ["SW_OPER_VOBS_1M_2_"] + collections_short["VOBS_SW_4M"] = ["SW_OPER_VOBS_4M_2_"] + collections_short["VOBS_CH_1M"] = ["CH_OPER_VOBS_1M_2_"] + collections_short["VOBS_CH_4M"] = ["CH_OPER_VOBS_4M_2_"] + collections_short["VOBS_CR_4M"] = ["CR_OPER_VOBS_4M_2_"] + collections_short["VOBS_SW_1M:SecularVariation"] = ["SW_OPER_VOBS_1M_2_:SecularVariation"] + collections_short["VOBS_SW_4M:SecularVariation"] = ["SW_OPER_VOBS_4M_2_:SecularVariation"] + collections_short["VOBS_CH_1M:SecularVariation"] = ["CH_OPER_VOBS_1M_2_:SecularVariation"] + collections_short["VOBS_CH_4M:SecularVariation"] = ["CH_OPER_VOBS_4M_2_:SecularVariation"] + collections_short["VOBS_CR_4M:SecularVariation"] = ["CR_OPER_VOBS_4M_2_:SecularVariation"] def _filter_collections(groupname): """ Reduce the full list to just one group, e.g. "MAG """ @@ -811,14 +902,9 @@ def _csv_to_df(csv_data): StringIO(str(csv_data, 'utf-8')) ) - obs_collections = [ - "SW_OPER_AUX_OBSH2_", - "SW_OPER_AUX_OBSM2_", - "SW_OPER_AUX_OBSS2_" - ] - if collection not in obs_collections: + if collection not in self.OBS_COLLECTIONS: raise ValueError( - f"Invalid collection: {collection}. Must be one of: {obs_collections}." + f"Invalid collection: {collection}. Must be one of: {self.OBS_COLLECTIONS}." ) if start_time and end_time: start_time = parse_datetime(start_time) @@ -832,7 +918,9 @@ def _csv_to_df(csv_data): if details: return df else: - return list(df["IAGACode"]) + # note: "IAGACode" has been renamed to "site" in VirES 3.5 + key = "IAGACode" if "IAGACode" in df.keys() else "site" + return list(df[key]) def _detect_AUX_OBS(self, collections): # Identify collection types present @@ -948,25 +1036,38 @@ def set_products(self, measurements=None, models=None, custom_model=None, raise OSError("Custom model .shc file not found") else: custom_shc = None + # Set up the variables that actually get passed to the WPS request + + def _model_datavar_names(variable, residuals=False): + """Give the list of allowable variable names containing model evaluations""" + if variable not in model_variables: + raise ValueError(f"Expected one of {model_variables}; got '{variable}'") + affix = "_res_" if residuals else "_" + return [f"{variable}{affix}{model_name}" for model_name in model_ids] + + # Identify which (if any) of ["F", "B_NEC", ...] are requested + model_variables_present = set(measurements).intersection(set(model_variables)) + # Create the list of variable names to request variables = [] - for variable in measurements: - if variable in model_variables: - if residuals: - variables.extend( - "%s_res_%s" % (variable, model_name) - for model_name in model_ids - ) - else: - variables.append(variable) - variables.extend( - "%s_%s" % (variable, model_name) - for model_name in model_ids - ) - else: # not a model variable + for variable in model_variables_present: + if not residuals: + # Include "F" / "B_NEC" as requested... variables.append(variable) + # Include e.g. "F_IGRF" / "B_NEC_IGRF" / "B_NEC_res_IGRF" etc. + variables.extend(_model_datavar_names(variable, residuals=residuals)) + if models and (len(model_variables_present) == 0): + if residuals: + raise ValueError( + f""" + Residuals requested without one of {model_variables} set as measurements + """ + ) + # If "F" / "B_NEC" have not been requested, include e.g. "B_NEC_IGRF" etc. + variables.extend(_model_datavar_names("B_NEC")) + # Include all the non-model-related variables + variables.extend(list(set(measurements) - model_variables_present)) variables.extend(auxiliaries) - # Set these in the SwarmWPSInputs object self._request_inputs.model_expression = model_expression_string self._request_inputs.variables = variables self._request_inputs.sampling_step = sampling_step diff --git a/viresclient/_data/__init__.py b/viresclient/_data/__init__.py new file mode 100644 index 0000000..27f4ad6 --- /dev/null +++ b/viresclient/_data/__init__.py @@ -0,0 +1,7 @@ +from os.path import join, dirname +import json + +_DIRNAME = dirname(__file__) + +with open(join(_DIRNAME, "config_swarm.json"), "r") as f: + CONFIG_SWARM = json.load(f) diff --git a/viresclient/_data/config_swarm.json b/viresclient/_data/config_swarm.json new file mode 100644 index 0000000..a81b21f --- /dev/null +++ b/viresclient/_data/config_swarm.json @@ -0,0 +1,4 @@ +{ + "IAGA_CODES": ["AAA", "AAE", "ABG", "ABK", "AIA", "ALE", "AMS", "API", "AQU", "ARS", "ASC", "ASP", "BDV", "BEL", "BFE", "BFO", "BGY", "BJN", "BLC", "BMT", "BNG", "BOU", "BOX", "BRD", "BRW", "BSL", "CBB", "CBI", "CDP", "CKI", "CLF", "CMO", "CNB", "CNH", "COI", "CPL", "CSY", "CTA", "CTS", "CYG", "CZT", "DED", "DLR", "DLT", "DMC", "DOB", "DOU", "DRV", "DUR", "EBR", "ELT", "ESA", "ESK", "EYR", "FCC", "FRD", "FRN", "FUQ", "FUR", "GAN", "GCK", "GDH", "GLM", "GLN", "GNA", "GNG", "GUA", "GUI", "GZH", "HAD", "HBK", "HER", "HLP", "HON", "HRB", "HRN", "HUA", "HYB", "IPM", "IQA", "IRT", "IZN", "JAI", "JCO", "KAK", "KDU", "KEP", "KHB", "KIR", "KIV", "KMH", "KNY", "KNZ", "KOU", "KSH", "LER", "LIV", "LMM", "LNP", "LON", "LOV", "LRM", "LRV", "LVV", "LYC", "LZH", "MAB", "MAW", "MBC", "MBO", "MCQ", "MEA", "MGD", "MID", "MIZ", "MMB", "MZL", "NAQ", "NCK", "NEW", "NGK", "NGP", "NMP", "NUR", "NVS", "ORC", "OTT", "PAF", "PAG", "PBQ", "PEG", "PET", "PHU", "PIL", "PND", "PPT", "PST", "QGZ", "QIX", "QSB", "QZH", "RES", "SBA", "SBL", "SFS", "SHE", "SHL", "SHU", "SIL", "SIT", "SJG", "SOD", "SPG", "SPT", "STJ", "SUA", "TAM", "TAN", "TDC", "TEO", "THJ", "THL", "THY", "TIR", "TND", "TRO", "TRW", "TSU", "TUC", "UPS", "VAL", "VIC", "VNA", "VOS", "VSK", "VSS", "WHN", "WIC", "WIK", "WNG", "YAK", "YKC"], + "VOBS_SITES": ["N90E000", "N77E026", "N77E077", "N77E129", "N77E180", "N77W129", "N77W077", "N77W026", "N65E004", "N65E032", "N65E059", "N65E087", "N65E115", "N65E142", "N65E170", "N65W162", "N65W135", "N65W107", "N65W079", "N65W051", "N65W024", "N54W003", "N54E017", "N54E037", "N54E057", "N54E077", "N54E097", "N54E117", "N54E137", "N54E157", "N54E177", "N54W163", "N54W143", "N54W123", "N54W103", "N54W083", "N54W063", "N54W043", "N54W023", "N42W007", "N42E009", "N42E024", "N42E040", "N42E056", "N42E071", "N42E087", "N42E103", "N42E118", "N42E134", "N42E150", "N42E165", "N42W179", "N42W163", "N42W148", "N42W132", "N42W116", "N42W101", "N42W085", "N42W069", "N42W054", "N42W038", "N42W023", "N30W009", "N30E004", "N30E018", "N30E031", "N30E044", "N30E058", "N30E071", "N30E084", "N30E098", "N30E111", "N30E124", "N30E138", "N30E151", "N30E164", "N30E178", "N30W169", "N30W156", "N30W142", "N30W129", "N30W116", "N30W102", "N30W089", "N30W076", "N30W062", "N30W049", "N30W036", "N30W022", "N18W010", "N18E002", "N18E014", "N18E026", "N18E038", "N18E050", "N18E062", "N18E074", "N18E086", "N18E098", "N18E110", "N18E122", "N18E134", "N18E146", "N18E158", "N18E170", "N18W178", "N18W166", "N18W154", "N18W142", "N18W130", "N18W118", "N18W106", "N18W094", "N18W082", "N18W070", "N18W058", "N18W046", "N18W034", "N18W022", "N06W010", "N06E002", "N06E013", "N06E025", "N06E037", "N06E048", "N06E060", "N06E072", "N06E083", "N06E095", "N06E106", "N06E118", "N06E130", "N06E141", "N06E153", "N06E164", "N06E176", "N06W172", "N06W161", "N06W149", "N06W138", "N06W126", "N06W114", "N06W103", "N06W091", "N06W079", "N06W068", "N06W056", "N06W045", "N06W033", "N06W021", "S06W004", "S06E008", "S06E019", "S06E031", "S06E042", "S06E054", "S06E066", "S06E077", "S06E089", "S06E101", "S06E112", "S06E124", "S06E135", "S06E147", "S06E159", "S06E170", "S06W178", "S06W167", "S06W155", "S06W143", "S06W132", "S06W120", "S06W108", "S06W097", "S06W085", "S06W074", "S06W062", "S06W050", "S06W039", "S06W027", "S06W016", "S18W003", "S18E009", "S18E021", "S18E033", "S18E045", "S18E057", "S18E069", "S18E081", "S18E093", "S18E105", "S18E117", "S18E129", "S18E141", "S18E153", "S18E165", "S18E177", "S18W171", "S18W159", "S18W147", "S18W135", "S18W123", "S18W111", "S18W099", "S18W087", "S18W075", "S18W063", "S18W051", "S18W039", "S18W027", "S18W015", "S30W001", "S30E012", "S30E025", "S30E039", "S30E052", "S30E065", "S30E079", "S30E092", "S30E105", "S30E119", "S30E132", "S30E145", "S30E159", "S30E172", "S30W175", "S30W161", "S30W148", "S30W135", "S30W121", "S30W108", "S30W095", "S30W081", "S30W068", "S30W055", "S30W041", "S30W028", "S30W015", "S42E001", "S42E017", "S42E033", "S42E048", "S42E064", "S42E079", "S42E095", "S42E111", "S42E126", "S42E142", "S42E158", "S42E173", "S42W171", "S42W155", "S42W140", "S42W124", "S42W108", "S42W093", "S42W077", "S42W061", "S42W046", "S42W030", "S42W014", "S54E006", "S54E026", "S54E046", "S54E066", "S54E086", "S54E106", "S54E126", "S54E146", "S54E166", "S54W174", "S54W154", "S54W134", "S54W114", "S54W094", "S54W074", "S54W054", "S54W034", "S54W014", "S65E014", "S65E042", "S65E070", "S65E098", "S65E125", "S65E153", "S65W179", "S65W152", "S65W124", "S65W096", "S65W069", "S65W041", "S65W013", "S77E040", "S77E092", "S77E143", "S77W166", "S77W114", "S77W063", "S77W011", "S90E000"] +} diff --git a/viresclient/_data_handling.py b/viresclient/_data_handling.py index f840cf6..bcd9a19 100644 --- a/viresclient/_data_handling.py +++ b/viresclient/_data_handling.py @@ -38,11 +38,13 @@ if os.name == "nt": import atexit +from ._data import CONFIG_SWARM + CDF_EPOCH_1970 = 62167219200000.0 # Frame names to use as xarray dimension names FRAME_NAMES = { - "NEC": ["B_NEC"], + "NEC": ["B_NEC", "B_OB", "B_CF", "B_SV", "sigma_OB", "sigma_CF", "sigma_SV"], "VFM": ["B_VFM", "dB_Sun", "dB_AOCS", "dB_other", "B_error"], "quaternion": ["q_NEC_CRF"], "WGS84": ["GPS_Position", "LEO_Position"], @@ -137,10 +139,10 @@ def get_variable(self, var): return data def get_variable_units(self, var): - return self._varatts[var].get("UNITS", None) + return self._varatts[var].get("UNITS", "") def get_variable_description(self, var): - return self._varatts[var].get("DESCRIPTION", None) + return self._varatts[var].get("DESCRIPTION", "") def get_variable_numdims(self, var): return self._varinfo[var].get("Num_Dims") @@ -206,7 +208,7 @@ def as_pandas_dataframe(self, expand=False): df[column + "_" + str(suffix)] = vector_data[:, i] return df - def as_xarray_dataset(self): + def as_xarray_dataset(self, reshape=False): # NB currrently does not set the global metadata (attrs) # (avoids issues with concatenating them) # (this is done in ReturnedData) @@ -256,16 +258,71 @@ def as_xarray_dataset(self): for dimname, dimlabels in FRAME_LABELS.items(): if dimname in dims_used: ds[dimname] = numpy.array(dimlabels) - ds[dimname].attrs["description"] = FRAME_DESCRIPTIONS.get( - dimname, None) ds = ds.set_coords(dimname) + # ds[dimname].attrs["description"] = FRAME_DESCRIPTIONS.get( + # dimname, None) + # ds = ds.set_coords(dimname) + # Reshape to a sensible higher dimensional structure + # Currently only for GVO data, and without magnetic model values or auxiliaries + # Inefficient as it is duplicating the data (ds -> ds2) + if reshape: + ds = self.reshape_dataset(ds) # Add metadata of each variable - for dataname in ds: - ds[dataname].attrs["units"] = self.get_variable_units(dataname) - ds[dataname].attrs["description"] = self.get_variable_description( - dataname) + for var in list(ds.data_vars) + list(ds.coords): + try: + ds[var].attrs["units"] = self.get_variable_units(var) + except KeyError: + ds[var].attrs["units"] = "" + try: + ds[var].attrs["description"] = self.get_variable_description(var) + except KeyError: + ds[var].attrs["description"] = FRAME_DESCRIPTIONS.get(var, "") return ds + @staticmethod + def reshape_dataset(ds): + if "SiteCode" not in ds.data_vars: + raise NotImplementedError( + """ + Only available for GVO dataset where the "SiteCode" + parameter has been requested + """ + ) + vobs_sites = dict(enumerate(CONFIG_SWARM.get("VOBS_SITES"))) + vobs_sites_inv = {v: k for k, v in vobs_sites.items()} + # Identify VOBS locations and mapping from integer "Site" identifier + pos_vars = ["Longitude", "Latitude", "Radius", "SiteCode"] + _ds_locs = next(iter(ds[pos_vars].groupby("Timestamp")))[1] + _ds_locs = _ds_locs.drop(("Timestamp")).rename({"Timestamp": "Site"}) + _ds_locs["Site"] = [vobs_sites_inv.get(code) for code in _ds_locs["SiteCode"].values] + _ds_locs = _ds_locs.sortby("Site") + # Create dataset initialised with the VOBS positional info as coords + # and datavars (empty) reshaped to (Site, Timestamp, ...) + t = numpy.unique(ds["Timestamp"]) + ds2 = xarray.Dataset( + coords={ + "Timestamp": t, + "SiteCode": (("Site"), _ds_locs["SiteCode"]), + "Latitude": ("Site", _ds_locs["Latitude"]), + "Longitude": ("Site", _ds_locs["Longitude"]), + "Radius": ("Site", _ds_locs["Radius"]), + "NEC": ["N", "E", "C"] + }, + ) + # (Dropping unused Spacecraft var) + data_vars = set(ds.data_vars) - {"Latitude", "Longitude", "Radius", "SiteCode", "Spacecraft"} + N_sites = len(_ds_locs["SiteCode"]) + for var in data_vars: + shape = [N_sites, len(t), *ds[var].shape[1:]] + ds2[var] = ("Site", *ds[var].dims), numpy.empty(shape, dtype=ds[var].dtype) + ds2[var][...] = None + # Loop through each VOBS site to insert the datavars into ds2 + for k, _ds in dict(ds.groupby("SiteCode")).items(): + site = vobs_sites_inv.get(k) + for var in data_vars: + ds2[var][site, ...] = _ds[var].values + return ds2 + def make_pandas_DataFrame_from_csv(csv_filename): """Load a csv file into a pandas.DataFrame @@ -432,7 +489,7 @@ def as_dataframe(self, expand=False): df = f.as_pandas_dataframe(expand=expand) return df - def as_xarray(self, group=None): + def as_xarray(self, group=None, reshape=False): """Convert the data to an xarray Dataset. Note: @@ -448,7 +505,7 @@ def as_xarray(self, group=None): raise NotImplementedError("csv to xarray is not supported") elif self.filetype == 'cdf': with FileReader(self._file) as f: - ds = f.as_xarray_dataset() + ds = f.as_xarray_dataset(reshape=reshape) elif self.filetype == 'nc': ds = xarray.open_dataset(self._file.name, group=group) return ds @@ -577,9 +634,12 @@ def as_dataframe(self, expand=False): return pandas.concat( [d.as_dataframe(expand=expand) for d in self.contents]) - def as_xarray(self): + def as_xarray(self, reshape=False): """Convert the data to an xarray Dataset. + Args: + reshape (bool): Reshape to a convenient higher dimensional form + Returns: xarray.Dataset @@ -590,7 +650,7 @@ def as_xarray(self): # and the filtering that has been applied. ds_list = [] for i, data in enumerate(self.contents): - ds_part = data.as_xarray() + ds_part = data.as_xarray(reshape=reshape) if ds_part is None: print("Warning: ", "Unable to create dataset from part {} of {}".format(