## Global Configuration

In [1]:
!pip install minio==7.0.3 h5py pandas



In [6]:
from minio import Minio
from minio.error import S3Error
import h5py
import sys
import os
import pandas as pd
import json
import shutil
import pathlib
import subprocess
import re

conf_minio_endpoint = ''
conf_minio_access_key = ''
conf_minio_secret_key = ''
conf_minio_secure = True #bool
conf_minio_download_dir = './minio_download_dir' #Set this to something relevant to your machine. I'm uncertain how the VRE handles directories but specify a path to download to.
conf_minio_input_bucket = 'lifewatchin'
conf_minio_input_prefix = 'NL/DHL/2018/10/03'
conf_radar_db_source_path = './radar_db_source_path/opera-radars-db.json' # Set this to something relevant to your machine. This needs to target radar_db.json. Found here. It's part of git Radar Cluster @ devel, radar_cluster/conf
conf_output_dir = './output_dir' # Set this to something relevant to your machine. This needs to specify a path from where to upload from.


with open(conf_radar_db_source_path, mode="r") as f:
    radar_db_json = json.load(f)
    radar_db = {}
# Reorder list to a usable dict with sub dicts which we can search with wmo codes
for radar_dict in radar_db_json:
    try:
        wmo_code = int(radar_dict.get("wmocode"))
        radar_db.update({wmo_code: radar_dict})
    except Exception:  # Happens when there is for ex. no wmo code.
        pass

In [7]:
#minioClient

minioClient = Minio(endpoint = conf_minio_endpoint,
                access_key= conf_minio_access_key,
                secret_key= conf_minio_secret_key,
                secure= conf_minio_secure)

## Retrieve input file list from conf_minio_prefix and conf_minio_bucket

In [8]:
#list_objects

list_objects = minioClient.list_objects(bucket_name = conf_minio_input_bucket,
                                        prefix = conf_minio_input_prefix,
                                        recursive = True)

# Retrieve the input dataset

In [9]:
# Retrieve the input datase
local_input_file_paths = []
for list_object in list_objects:
    # Return object_name as str
    object_name = list_object.object_name
    # append object name (file name) to download dir
    local_file_name = "{}/{}".format(conf_minio_download_dir,object_name)
    # fget (file get) the object
    minioClient.fget_object(
        bucket_name= list_object.bucket_name,
        object_name=list_object.object_name,
        file_path=local_file_name)
    # append the full file path to the file path list, for future useage
    local_input_file_paths.append(local_file_name)

In [10]:
# Create a dataframe to track input and output filenames
# functions
def gen_output_path(ibed_pvol_file_name):

    """
    Read a file, determine what the path convention is.
    Input is a filename str which is already in the IBED naming convention

    PVOL:       DEASB_pvol_20190215T0000    >   pvol/DE/ASB/2019/02/15
                DEBOO_pvol_20190215T0000    >   pvol/DE/BOO/2019/02/15
                NLHRW_pvol_20190215T0000    >   pvol/NL/HRW/2019/02/15
                UKCHE_pvol_20190215T0000    >   pvol/UK/CHE/2019/02/15
                BEZAV_pvol_20190215T0000    >   pvol/BE/ZAV/2019/02/15
    """

    # dateexpr = r'(\d{8})(T{0,1})(\d{4})'

    # match = re.match(dateexpr,out_pvol_pathibed_pvol_file_name)
    # print(match)

    output_path = "/".join(
        [
            ibed_pvol_file_name[0:2],  # Country
            ibed_pvol_file_name[2:5],  # Radar abbreviation
            ibed_pvol_file_name[11:15],  # Year
            ibed_pvol_file_name[15:17],  # Month
            ibed_pvol_file_name[17:19],  # Day
            "",  # Adding a trailing slash
        ]
    )

    return output_path

    
def translate_wmo_odim(radar_db, wmo_code):
    """"""

    # class FileTranslatorFileTypeError(LookupError):
    #    '''raise this when there's a filetype mismatch derived from h5 file'''

    if not isinstance(wmo_code, int):
        raise ValueError("Expecting a wmo_code [int]")
    else:
        pass

   
    odim_code = (
        radar_db.get(wmo_code).get("odimcode").upper().strip()
    )  # Apparently, people sometimes forget to remove whitespace..
    return odim_code

def extract_wmo_code(in_path):

    with h5py.File(in_path, "r") as f:

        # DWD Specific

        # Main attributes
        what = f["what"].attrs

        # Source block
        source = what.get("source")
        source = source.decode("utf-8")

        # Determine if we are dealing with a WMO code or with an ODIM code set
        # Example from Germany where source block is set as WMO
        # what/source: "WMO:10103"
        # Example from The Netherlands where source block is set as a combination of ODIM and various codes
        # what/source: RAD:NL52,NOD:nlhrw,PLC:Herwijnen
        source_list = source.split(sep=",")

    wmo_code = [string for string in source_list if "WMO" in string]

    # Determine if we had exactly one WMO hit
    if len(wmo_code) == 1:
        wmo_code = wmo_code[0]
        wmo_code = wmo_code.replace("WMO:", "")

    # No wmo code found, most likeley dealing with a dutch radar
    elif len(wmo_code) == 0:
        rad_str = [string for string in source_list if "RAD" in string]

        if len(rad_str) == 1:
            rad_str = rad_str[0]
        else:
            print(
                "Something went wrong with determining the rad_str and it wasnt WMO either, exiting"
            )
            sys.exit(1)
        # Split the rad_str
        rad_str_split = rad_str.split(":")
        # [0] = RAD, [1] = rad code
        rad_code = rad_str_split[1]

        rad_codes = {"NL52": "6356", "NL51": "6234", "NL50": "6260"}

        wmo_code = rad_codes.get(rad_code)

    return int(wmo_code)

def dwd_file_translator(radar_db, in_file):
    class FileTranslatorFileTypeError(LookupError):
        """raise this when there's a filetype mismatch derived from h5 file"""

    # Available codes. Adjust this to load radardb from ../conf/
    wmo_odim_code = {
        "10204": "DEEMD",
        "10103": "DEASB",
        "10169": "DEROS",
        "10132": "DEBOO",
        "10339": "DEHNR",
        "10440": "DEFLD",
        "10629": "DEOFT",
        "10908": "DEFBG",
        "10605": "DENHB",
        "10410": "DEESS",
        "10557": "DENEU",
        "10950": "DEMEM",
        "10873": "DEISN",
        "10832": "DETUR",
        "10780": "DEEIS",
        "10488": "DEDRS",
        "10392": "DEPRO",
        "10356": "DEUMD",
        "06410": "BEJAB",
        "06477": "BEWID",
        "06451": "BEZAV",
        "6356": "NLHRW",
        "6234": "NLDHL",
        "6260": "NLDBL",
        "06194": "DKBOR",
        "06034": "DKSIN",
        "06096": "DKROM",
        "06173": "DKSTE",
        "06103": "DKVIR",
    }

    try:
        wmo_code = extract_wmo_code(in_file)
        odim_code = translate_wmo_odim(radar_db, wmo_code)

        with h5py.File(in_file, "r") as f:

            # DWD Specific

            # Main attributes
            what = f["what"].attrs

            # Date block
            date = what.get("date")
            date = date.decode("utf-8")

            # Time block
            time = what.get("time")
            # time = f['dataset1/what'].attrs['endtime']
            time = time.decode("utf-8")
            hh = time[:2]
            mm = time[2:4]
            ss = time[4:]

            time = time[:-2]  # Do not include seconds
            # File type
            filetype = what.get("object")
            filetype = filetype.decode("utf-8")

            if filetype != "PVOL":
                raise FileTranslatorFileTypeError("File type was NOT pvol")

        name = [odim_code, filetype.lower(), date + "T" + time, str(wmo_code) + ".h5"]
        out_file_name = "_".join(name)

    except Exception as e:
        print(e)
        print("Invalid file, skipping file: {}".format(in_file))
        return None
    # Remove None (None stays when we could not open the file..)

    # out_file_paths = [path.replace(os.path.basename(path),fname) for path,fname in zip(checked_in_file,out_file_name)]
    out_file_path = in_file.replace(os.path.basename(in_file), out_file_name)

    # ibed_out_path = gen_output_path(out_file_name[0])
    ibed_out_path = gen_output_path(out_file_name)

    # out_file_paths = ["/".join(["./out/pvol/",ibed_out_path,fname]) for fname in out_file_name]
    out_file_path = "/".join(["./out/pvol/", ibed_out_path, out_file_name])

    return out_file_path

df = pd.DataFrame()
df['source_pvol_path'] = local_input_file_paths    
df['out_pvol_file_path'] = [dwd_file_translator(radar_db, path) for path in df["source_pvol_path"]] 


In [11]:
def list_unique_dirs(path_list):
    """

    path_list: a list with path strings
    return: a list with unique directories

    """

    unique_dirs = list(set([os.path.dirname(path) for path in path_list]))

    return unique_dirs


# list the amount of unique dirs we have, this should be one per dataset
unique_dir_pvol = list_unique_dirs(df['out_pvol_file_path'])
# Create an output directory from the first path name for the PVOL's
gen_output_path(df['out_pvol_file_path'].iloc[0])


'.//out//NL//DH/L//'

In [12]:
def vol2bird(in_file, out_dir, radar_db, add_version=True, add_sector=False):
    # Construct output file
    date_regex = "([0-9]{8})"

    if add_version == True:
        version = "v0-3-20"
        suffix = pathlib.Path(in_file).suffix
        in_file_name = pathlib.Path(in_file).name
        in_file_stem = pathlib.Path(in_file_name).stem
        #
        out_file_name = in_file_stem.replace("pvol", "vp")
        out_file_name = "_".join([out_file_name, version]) + suffix

        # odim = odim_code(out_file_name)
        wmo = extract_wmo_code(in_file)
        odim = translate_wmo_odim(radar_db, wmo)

        datetime = pd.to_datetime(re.search(date_regex, out_file_name)[0])

        ibed_path = "/".join(
            [
                odim[:2],
                odim[2:],
                str(datetime.year),
                str(datetime.month).zfill(2),
                str(datetime.day).zfill(2),
            ]
        )

        out_file = "/".join([out_dir, ibed_path, out_file_name])

        # out_file = "_".join([out_file[:-len(suffix)], version + suffix])

    command = ["vol2bird", in_file, out_file]
    #command = ["/Users/nicolas_noe/vol2bird/opt/vol2bird/bin/vol2bird", in_file, out_file]

    result = subprocess.run(command, stderr=subprocess.DEVNULL)

    # if result.returncode != 0:
    #    print(result)
    #    print("Something went wrong, exitting")
    #    sys.exit(1)
    return [result, in_file, out_file]

# create the output pvol directory
for dir_name in unique_dir_pvol:
    os.makedirs(dir_name, exist_ok=True)

# 'move' the files from old file name to new filename and towards output directory
for idx, row in df.iterrows():
    shutil.copy(row['source_pvol_path'], row['out_pvol_file_path'])
    
# Now prepare a column of filenames for the Vertical Profile files which will be generated from the PVOL (output) files
df['out_vp_path'] = [row['out_pvol_file_path'].replace("pvol","vp") for idx, row in df.iterrows()]

# Determine unique VP directories
unique_dir_vp = list_unique_dirs(df['out_vp_path'])

# Create the dir
for dir_name in unique_dir_vp:
    os.makedirs(dir_name, exist_ok=True)

#Initialize an empty column for vol2bird retcodes
df['v2b_retcode'] = [None] * len(df)
df['out_vp_path'] = [None] * len(df) # This is quite redundant, I'll check this when I'm back. Basically, now we are throwing away our old generated VP paths

out_dir_vp = "{}/{}".format(conf_output_dir,'vp')
output_file_list = []

for idx, row in df.iterrows():
    
    retcode, input_file, output_file = vol2bird(row['out_pvol_file_path'],
             out_dir_vp,
             radar_db)
    # append output file 
    output_file_list.append(output_file)
    
# Upload the data, will be added later. First need to solve using .C code and subprocess calls in VRE.

# vol2bird Vertical Profile of Birds (VPB)
# source: RAD:NL51,NOD:nldhl,PLC:Den Helder
# polar volume input: ./out/pvol//NL/DHL/2018/10/03//NLDHL_pvol_20181003T0000_6234.h5
# date   time HGHT    u      v       w     ff    dd  sd_vvp gap dbz     eta   dens   DBZH   n   n_dbz n_all n_dbz_all
20181003 0000    0   4.67  -3.63  -36.37  5.92 127.9   2.02 F  -5.95   89.8   8.16  27.81  2351  4678 37295 55812
20181003 0000  200   4.48  -3.65    8.80  5.78 129.2   2.59 F  -7.52   62.6   5.69  19.85  9718 25633 45438 77623
20181003 0000  400   3.79  -2.38   -8.78  4.48 122.1   2.84 F  -9.45   40.1   3.65   1.97  9680 27635 21521 50722
20181003 0000  600   4.29  -2.47    3.29  4.95 120.0   3.31 F -12.90   18.1   1.65  -2.92  5316 19595 14591 39688
20181003 0000  800   5.06  -2.82    6.32  5.79 119.1   3.31 F -14.36   13.0   1.18  -8.30  6262 18948 13200 35695
20181003 0000 1000   4.27  -2.71   -1.14  5.05 122.4   3.53 F -15.74    9.4   0.86  -7.69  4743 17828 12406 35959
20181003 0000 1200   3.37