In [1]:
from scripts.paths import elevation_path,sentinel_data_path,mask_path
import rasterio
import numpy as np
from rasterio.mask import geometry_mask
from rasterio.warp import calculate_default_transform, reproject, Resampling
import os 
import geopandas as gpd
import h5py
import json

In [2]:
# Create a nested dictionary to store the opened raster data for each band of each month in 2019
bands_data_2019 = {
    month: {
        band: rasterio.open(sentinel_data_path[2019][month][band]).read(1).astype('int16')
        for band in sentinel_data_path[2019][month]
    }
    for month in sentinel_data_path[2019]
}

In [3]:
# Load metadata
meta = rasterio.open(sentinel_data_path[2019][1]['B02']).meta

#serialize in json 
meta_serialized = json.dumps({
    'driver': meta['driver'],
    'dtype': meta['dtype'],
    'nodata': meta['nodata'],
    'width': meta['width'],
    'height': meta['height'],
    'count': meta['count'],
    'crs': str(meta['crs']),  # Convert the CRS object to a string
    'transform': str(meta['transform'])  # Convert the Affine object to a string
}) 

In [4]:
#load mask
if os.path.isfile(mask_path): 
    mask = rasterio.open(mask_path).read(1).astype(bool)

loaded


In [5]:
# Resample altitude data to match the other bands
with rasterio.open(elevation_path) as src:
    altitude_resampled = np.zeros((meta['height'], meta['width']), np.float32)

    reproject(
        source=rasterio.band(src, 1),
        destination=altitude_resampled,
        src_transform=src.transform,
        src_crs=src.crs,
        dst_transform=meta['transform'],
        dst_crs=meta['crs'],
        resampling=Resampling.nearest)

In [6]:
# Determine the shape of the bands
height, width = next(iter(next(iter(bands_data_2019.values())).values())).shape  # Shape of the bands
bands_per_month = len(next(iter(bands_data_2019.values())))  # Number of original bands per month

total_bands = bands_per_month +1


In [9]:
# Create an HDF5 file and store data in it
with h5py.File('hypercube.h5', 'w') as hdf_file:
    for month_idx, (month, bands_dict) in enumerate(bands_data_2019.items()):
        # Create a 3D array for the current month
        monthly_values = np.full((height, width, total_bands), np.nan)
        
        # Fill the monthly array with band data and calculate NDVI
        for band_idx, (band_name, band_data) in enumerate(bands_dict.items()):
            monthly_values[:, :, band_idx][mask] = band_data[mask]
            monthly_values[:, :, band_idx][~mask] = np.nan
        
        # Calculate NDVI
        red = bands_dict['B04']
        nir = bands_dict['B08']
        ndvi = (nir - red) / (nir + red)
        monthly_values[:, :, bands_per_month][mask] = ndvi[mask]

        # Store the 3D array for the current month in the HDF5 file
        hdf_file.create_dataset(f'month_{month_idx+1}', data=monthly_values, compression='gzip', compression_opts=9)
        print(f"{month} /12")
    # Store altitude and mask separately (these are 2D arrays)
    hdf_file.create_dataset('altitude', data=altitude_resampled, compression='gzip', compression_opts=9)
    hdf_file.create_dataset('mask', data=mask.astype(bool), compression='gzip', compression_opts=9)
    hdf_file.attrs['meta'] = meta_serialized 

  ndvi = (nir - red) / (nir + red)


1 /12
2 /12
3 /12
4 /12
5 /12
6 /12
7 /12
8 /12
9 /12
10 /12
11 /12
12 /12
