## Global Configuration

In [3]:
!pip install minio==7.0.3 h5py pandas


Collecting minio==7.0.3
  Using cached minio-7.0.3-py3-none-any.whl (75 kB)
Collecting h5py
  Using cached h5py-3.6.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (4.5 MB)
Collecting pandas
  Using cached pandas-1.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.7 MB)
Collecting numpy>=1.14.5
  Using cached numpy-1.22.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)
Installing collected packages: numpy, minio, pandas, h5py
Successfully installed h5py-3.6.0 minio-7.0.3 numpy-1.22.3 pandas-1.4.1


In [12]:
from minio import Minio
from minio.error import S3Error
import h5py
import sys
import os
import pandas as pd
import json
import shutil
import pathlib
import subprocess
import re

conf_minio_endpoint = ''
conf_minio_access_key = ''
conf_minio_secret_key = ''
conf_minio_secure = True #bool
conf_minio_download_dir = './minio_download_dir' #Set this to something relevant to your machine. I'm uncertain how the VRE handles directories but specify a path to download to.
conf_minio_input_bucket = 'lifewatchin'
conf_minio_input_prefix = 'NL/DHL/2018/10/03'
conf_radar_db_source_path = './radar_db_source_path/opera-radars-db.json' # Set this to something relevant to your machine. This needs to target radar_db.json. Found here. It's part of git Radar Cluster @ devel, radar_cluster/conf
conf_output_dir = './output_dir' # Set this to something relevant to your machine. This needs to specify a path from where to upload from.

with open(conf_radar_db_source_path, mode="r") as f:
    radar_db_json = json.load(f)

    radar_db = {}
# Reorder list to a usable dict with sub dicts which we can search with wmo codes
for radar_dict in radar_db_json:
    try:
        wmo_code = int(radar_dict.get("wmocode"))
        radar_db.update({wmo_code: radar_dict})
    except Exception:  # Happens when there is for ex. no wmo code.
        pass




In [13]:
#minioClient

minioClient = Minio(endpoint = conf_minio_endpoint,
                access_key= conf_minio_access_key,
                secret_key= conf_minio_secret_key,
                secure= conf_minio_secure)

## Retrieve input file list from conf_minio_prefix and conf_minio_bucket

In [14]:
#list_objects

list_objects = minioClient.list_objects(bucket_name = conf_minio_input_bucket,
                                        prefix = conf_minio_input_prefix,
                                        recursive = True)

# Retrieve the input dataset

In [None]:
# Retrieve the input datase
local_input_file_paths = []
for list_object in list_objects:
    # Return object_name as str
    object_name = list_object.object_name
    # append object name (file name) to download dir
    local_file_name = "{}/{}".format(conf_minio_download_dir,object_name)
    # fget (file get) the object
    minioClient.fget_object(
        bucket_name= list_object.bucket_name,
        object_name=list_object.object_name,
        file_path=local_file_name)
    # append the full file path to the file path list, for future useage
    local_input_file_paths.append(local_file_name)


In [None]:
# Create a dataframe to track input and output filenames
df = pd.DataFrame()
df['source_pvol_path'] = local_input_file_paths
df['out_pvol_file_path'] = [dwd_file_translator(radar_db, path) for path in df["source_pvol_path"]] 

# list the amount of unique dirs we have, this should be one per dataset
unique_dir_pvol = list_unique_dirs(df['out_pvol_file_path'])
# Create an output directory from the first path name for the PVOL's
gen_output_path(df['out_pvol_file_path'].iloc[0])

# create the output pvol directory
os.makedirs(unique_dir_pvol,
            exists_ok=True)

# 'move' the files from old file name to new filename and towards output directory
for idx, row in df.iterrows():
    shutil.copy(row['source_pvol_path'],
                row['out_pvol_path'])
    
# Now prepare a column of filenames for the Vertical Profile files which will be generated from the PVOL (output) files
df['out_vp_path'] = [row['out_pvol_path'].replace("pvol","vp") for idx, row in df.iterrows()]

# Determine unique VP directories
unique_dir_vp = list_unique_dirs(df['out_vp_path'])

# Create the dir
os.makedirs(unique_dir_vp,
            exists_ok=True)

#Initialize an empty column for vol2bird retcodes
df['v2b_retcode'] = [None] * len(df)
df['out_vp_path'] = [None] * len(df) # This is quite redundant, I'll check this when I'm back. Basically, now we are throwing away our old generated VP paths

out_dir_vp = "{}/{}".format(conf_output_dir,'vp')
output_file_list = []
for idx, row in df.iterrows():
    
    retcode, input_file, output_file = vol2bird(row['out_pvol_path'],
             out_dir_vp,
             radar_db)
    # append output file 
    output_file_list.append(output_file)
    
# Upload the data, will be added later. First need to solve using .C code and subprocess calls in VRE.