In [2]:
import os
import subprocess
import glob

In [1]:
# get all files in a directory

def get_all_files_from_directory(directory):
    return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]

# get all files in a directory with h5 extension

def get_all_files(directory, extension="*.h5"):
    """Get all files with the specified extension from the given directory."""
    return glob.glob(os.path.join(directory, extension))


In [7]:
# convert h5 to vrt

def h4_to_vrt(filename):
    input_name = filename
    output_name = filename.replace('.h5', '.vrt')

    # Construct the gdal_translate command as a list of arguments
    cmd = [
        'gdal_translate',
        '-of', 'VRT',                
        '-ot', 'Byte',               
        '-a_nodata', '-28,-29,-30,-31,-32,-33,-34,-35,-36,-37',  
        '-scale', '-28', '36', '0', '255',  
        input_name,                  
        output_name,                
    ]

    cmd = [
        'gdal_translate',
        '-of', 'VRT',                
        '-ot', 'Byte',              
        input_name,                  
        output_name,                 
    ]

    # Run the gdal_translate command
    try:
        subprocess.run(cmd, check=True)
        print(f"VRT file '{output_name}' created successfully.")
    except subprocess.CalledProcessError as e:
        print(f"Error while creating VRT file: {e}")


In [56]:
# polygonize h5 file and save it as GeoJSON

def polygonize_h5_file(h5_file_path, dataset_path, output_geojson):
    
    # Temporary file to hold the categorized raster
    temp_tif = 'temp.tif'
    
    try:
        input_dataset = f'HDF5:"{h5_file_path}"://{dataset_path}'

        # Use gdal_calc.py to categorize values based on the defined ranges
        calc_cmd = [
            'gdal_calc.py',
            '-A', input_dataset,
            '--outfile', temp_tif,
            '--calc', "numpy.round(A*100)",
            '--NoDataValue', '0',
            '--overwrite',  
            '--type', 'Int32'  
        ]
        
        subprocess.run(calc_cmd, check=True)
        
        polygonize_cmd = [
            'gdal_polygonize.py',
            temp_tif, '-f', 'GeoJSON',
            output_geojson,
            '-b 1'
        ]
        
        subprocess.run(polygonize_cmd, check=True)
        print(f"Created GeoJSON: {output_geojson}")
        
    except subprocess.CalledProcessError as e:
        print(f"Error processing HDF5 file {h5_file_path}: {e}")
        
    finally:
        # Clean up the temporary file
        os.remove(temp_tif)
        print("Temporary file removed.")

In [58]:
directory = './data/BZC/'

# Iterate over all VRT files
for file_path in get_all_files(directory):
    print(f"Processing file: {file_path}")

    # Output file path
    output_file = file_path.replace('.h5', '.geojson')
    print(f"Processing file: {output_file}")

    # Construct the command
    dataset_path = 'dataset1/data1/data'

    polygonize_h5_file(file_path, dataset_path, output_file)



Processing file: ./geojson/BZC/BZC221240330VL.845.h5
Processing file: ./geojson/BZC/BZC221240330VL.845.geojson
0...10...20...30...40...50...60...70...80...90...100 - done.
Creating output ./geojson/BZC/BZC221240330VL.845.geojson of format GeoJSON.
0...10...20...30...40...50...60...70...80...90...100 - done.
Created GeoJSON: ./geojson/BZC/BZC221240330VL.845.geojson
Temporary file removed.
Processing file: ./geojson/BZC/BZC221240405VL.845.h5
Processing file: ./geojson/BZC/BZC221240405VL.845.geojson
0...10...20...30...40...50...60...70...80...90...100 - done.
Creating output ./geojson/BZC/BZC221240405VL.845.geojson of format GeoJSON.
0...10...20...30...40...50...60...70...80...90...100 - done.
Created GeoJSON: ./geojson/BZC/BZC221240405VL.845.geojson
Temporary file removed.
Processing file: ./geojson/BZC/BZC221240205VL.845.h5
Processing file: ./geojson/BZC/BZC221240205VL.845.geojson
0...10...20...30...40...50...60...70...80...90...100 - done.
Creating output ./geojson/BZC/BZC221240205VL.8

In [2]:
import json
import os
from pyproj import Transformer

input_dir = './data/BZC'
output_dir = './data/bzc_transformed'

os.makedirs(output_dir, exist_ok=True)

transformer = Transformer.from_crs("EPSG:2056", "EPSG:4326", always_xy=True)

def transform_coordinates(coordinates):
    """Transform a single pair of coordinates from LV95+ to WGS84."""
    return transformer.transform(coordinates[0], coordinates[1])

def process_geojson_feature(feature):
    """Process and transform the coordinates of a GeoJSON feature."""
    geom_type = feature['geometry']['type']
    coords = feature['geometry']['coordinates']
    
    if geom_type == 'Point':
        feature['geometry']['coordinates'] = transform_coordinates(coords)
    elif geom_type in ['LineString', 'MultiPoint']:
        feature['geometry']['coordinates'] = [transform_coordinates(coord) for coord in coords]
    elif geom_type in ['Polygon', 'MultiLineString']:
        for i, ring in enumerate(coords):
            feature['geometry']['coordinates'][i] = [transform_coordinates(coord) for coord in ring]
    elif geom_type == 'MultiPolygon':
        for i, polygon in enumerate(coords):
            for j, ring in enumerate(polygon):
                feature['geometry']['coordinates'][i][j] = [transform_coordinates(coord) for coord in ring]
    return feature

def transform_geojson_file(filepath, output_path):
    """Read, transform, and save a GeoJSON file."""
    with open(filepath, 'r') as file:
        geojson_data = json.load(file)
    
    for feature in geojson_data.get('features', []):
        process_geojson_feature(feature)
    
    with open(output_path, 'w') as file:
        json.dump(geojson_data, file, indent=2)

for filename in os.listdir(input_dir):
    if filename.endswith('.geojson'):
        filepath = os.path.join(input_dir, filename)
        output_path = os.path.join(output_dir, filename)
        print(f"Processing {filename}...")
        transform_geojson_file(filepath, output_path)

print("All files have been processed.")


Processing BZC221240635VL.845.geojson...
Processing BZC221241020VL.845.geojson...
Processing BZC221241235VL.845.geojson...
Processing BZC221241630VL.845.geojson...
Processing BZC221240110VL.845.geojson...
Processing BZC221240600VL.845.geojson...
Processing BZC221241100VL.845.geojson...
Processing BZC221240700VL.845.geojson...
Processing BZC221240210VL.845.geojson...
Processing BZC221241715VL.845.geojson...
Processing BZC221241015VL.845.geojson...
Processing BZC221241240VL.845.geojson...
Processing BZC221242230VL.845.geojson...
Processing BZC221241555VL.845.geojson...
Processing BZC221240945VL.845.geojson...
Processing BZC221240940VL.845.geojson...
Processing BZC221241040VL.845.geojson...
Processing BZC221241640VL.845.geojson...
Processing BZC221240450VL.845.geojson...
Processing BZC221241700VL.845.geojson...
Processing BZC221241145VL.845.geojson...
Processing BZC221240525VL.845.geojson...
Processing BZC221241120VL.845.geojson...
Processing BZC221240430VL.845.geojson...
Processing BZC22