In [7]:
# add all SWOT data from ./data/Water mask pixel cloud 3 to postgis database (swotdb)

# imports
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import json
from tqdm import tqdm
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, Float, String, Text


# Load configuration
with open('config.json', 'r') as f:
    config = json.load(f)

# Get a list of all NetCDF files in the pixel cloud directory
file_list = list(Path(config['water_mask_pixel_cloud_dir']).glob('*.nc'))
print(f"Found {len(file_list)} files in {config['water_mask_pixel_cloud_dir']}")

Found 148 files in data/Water mask pixel cloud 3


In [8]:
# Database connection
engine = create_engine('postgresql://ben:1234@localhost:5432/swotdb')
metadata = MetaData()

# Define database tables
file_metadata_table = Table(
    'file_metadata', metadata,
    Column('id', Integer, primary_key=True),
    Column('file_name', String, nullable=False),
    Column('cycle_number', Integer),
    Column('pass_number', Integer),
    Column('tile_number', Integer),
    Column('tile_name', String),
    Column('time_granule_start', String),
    Column('time_granule_end', String),
    Column('geospatial_lon_min', Float),
    Column('geospatial_lon_max', Float),
    Column('geospatial_lat_min', Float),
    Column('geospatial_lat_max', Float)
)

pixel_cloud_table = Table(
    'pixel_cloud', metadata,
    Column('id', Integer, primary_key=True),
    Column('file_id', Integer),
    Column('latitude', Float),
    Column('longitude', Float),
    Column('height', Float),
    Column('water_frac', Float),
    Column('classification', Integer),
    Column('sig0', Float),
    Column('pixel_area', Float)
)

# Create tables if they don't exist
metadata.create_all(engine)

In [None]:
for filepath in tqdm(file_list, desc="Processing files", unit="file"):
    # Open the NetCDF file
    with h5py.File(filepath, 'r') as f:
        # Extract metadata
        metadata_values = {
            'file_name': filepath.name,
            'cycle_number': int(f.attrs['cycle_number'][0]),
            'pass_number': int(f.attrs['pass_number'][0]),
            'tile_number': int(f.attrs['tile_number'][0]),
            'tile_name': f.attrs['tile_name'].decode(),
            'time_granule_start': f.attrs['time_granule_start'].decode(),
            'time_granule_end': f.attrs['time_granule_end'].decode(),
            'geospatial_lon_min': float(f.attrs['geospatial_lon_min'][0]),
            'geospatial_lon_max': float(f.attrs['geospatial_lon_max'][0]),
            'geospatial_lat_min': float(f.attrs['geospatial_lat_min'][0]),
            'geospatial_lat_max': float(f.attrs['geospatial_lat_max'][0])
        }
        with engine.connect() as conn:
            result = conn.execute(file_metadata_table.insert().values(metadata_values))
            file_id = result.inserted_primary_key[0]

        # Extract pixel cloud data
        pixel_cloud = f['pixel_cloud']
        latitude = pixel_cloud['latitude'][:]
        longitude = pixel_cloud['longitude'][:]
        height = pixel_cloud['height'][:]
        water_frac = pixel_cloud['water_frac'][:]
        classification = pixel_cloud['classification'][:]
        sig0 = pixel_cloud['sig0'][:]
        pixel_area = pixel_cloud['pixel_area'][:]

        # Prepare data for insertion
        pixel_data = pd.DataFrame({
            'file_id': file_id,
            'latitude': latitude,
            'longitude': longitude,
            'height': height,
            'water_frac': water_frac,
            'classification': classification,
            'sig0': sig0,
            'pixel_area': pixel_area
        })

        # Insert pixel cloud data into the database
        pixel_data.to_sql('pixel_cloud', engine, if_exists='append', index=False)

Processing files:   0%|          | 0/148 [00:00<?, ?file/s]