## Welcome to your notebook.


#### Run this cell to connect to your GIS and get started:

#### Now you are ready to start!

In [1]:
###Install the correct libraries via pip or conda then run this part of the code

In [10]:
#Name: PRISMA2GeoTIFF.py
#Description: reads he5 PRISMA files content and converts it to a GeoTIFF.
#All 66 VNIR bands and 173 SWIR bands are converted in one single GeoTIFF file.
#input is a PRISMA he5 file and output is a GeoTIFF with the same name in the same path
#Author: martin rapilly, mrapilly60@uasd.edu.do/martin.rapilly@get.omp.eu

#import libraries
import h5py
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import pandas as pd
from itertools import chain
from osgeo import gdal, osr
import os

In [3]:
###Run this code to initiate the function PRISMA2GeoTIFF

In [11]:
#np.set_printoptions(threshold=np.inf)#optional: uncommenting this line will show full arrays when printing on the console. Not recommended as he5 PRISMA files contain many values that can overrun memory

def PRISMA2GeoTIFF (filename):
    #open he5 file and read its content
    f = h5py.File(filename,'r')
    def print_name(name, obj):
        if isinstance(obj, h5py.Dataset):
            print ('Dataset:', name)
        elif isinstance(obj, h5py.Group):
            print ('Group:', name)
    with h5py.File(filename, 'r')  as h5f: # file will be closed when we exit from WITH scope
        h5f.visititems(print_name)

    
        #read SWIR and VNIR cube contents
        '''SWIRcube = h5f['HDFEOS/SWATHS/PRS_L2D_HCO/Data Fields/SWIR_Cube'][()]#[()] is to get the value. Can be replaced with .value
        VNIRcube = h5f['HDFEOS/SWATHS/PRS_L2D_HCO/Data Fields/VNIR_Cube'][()]
    
        #read latitude and longitude contents
        lat = h5f['HDFEOS/SWATHS/PRS_L2D_HCO/Geolocation Fields/Latitude'][()]
        lon = h5f['HDFEOS/SWATHS/PRS_L2D_HCO/Geolocation Fields/Longitude'][()]'''


        def first_existing(h5f, paths):
            for p in paths:
                if p in h5f:
                    return p
            return None

        # --- detect which PRISMA swath exists (L1 vs L2D, HCO vs HRC) ---
        swath = first_existing(h5f, [
            "HDFEOS/SWATHS/PRS_L2D_HCO",
            "HDFEOS/SWATHS/PRS_L2D_HRC",
            "HDFEOS/SWATHS/PRS_L1_HCO",
            "HDFEOS/SWATHS/PRS_L1_HRC",
        ])
        if swath is None:
            raise KeyError("No PRISMA swath found under HDFEOS/SWATHS (expected PRS_L1_* or PRS_L2D_*).")
        
        # --- group names can be 'Data Fields' or 'Data_Fields' (underscore) ---
        data_fields = first_existing(h5f, [f"{swath}/Data Fields", f"{swath}/Data_Fields"])
        if data_fields is None:
            raise KeyError(f"Found swath {swath} but no Data Fields group.")
        
        geo_fields  = first_existing(h5f, [f"{swath}/Geolocation Fields", f"{swath}/Geolocation_Fields"])
        if geo_fields is None:
            raise KeyError(f"Found swath {swath} but no Geolocation Fields group.")
        
        # --- datasets ---
        swir_path = first_existing(h5f, [f"{data_fields}/SWIR_Cube"])
        vnir_path = first_existing(h5f, [f"{data_fields}/VNIR_Cube"])
        # --- PRISMA L1 has spectrometer-specific geolocation ---
        lat_path = first_existing(h5f, [
            f"{geo_fields}/Latitude_VNIR",
            f"{geo_fields}/Latitude_SWIR",
        ])
        lon_path = first_existing(h5f, [
            f"{geo_fields}/Longitude_VNIR",
            f"{geo_fields}/Longitude_SWIR",
        ])
        
        if lat_path is None or lon_path is None:
            raise KeyError(
                f"No usable Latitude/Longitude found. "
                f"Available: {list(h5f[geo_fields].keys())}"
            )
        
        lat = h5f[lat_path][()]
        lon = h5f[lon_path][()]
        
        print("Using geolocation:", lat_path, lon_path)
                
        missing = [name for name, p in [("SWIR_Cube", swir_path), ("VNIR_Cube", vnir_path), ("Latitude", lat_path), ("Longitude", lon_path)] if p is None]
        if missing:
            # show what exists to help you map names
            available_df  = list(h5f[data_fields].keys()) if data_fields in h5f else []
            available_geo = list(h5f[geo_fields].keys()) if geo_fields in h5f else []
            raise KeyError(f"Missing {missing}. DataFields has: {available_df}. Geolocation has: {available_geo}.")
        
        # --- read arrays ---
        SWIRcube = h5f[swir_path][()]
        VNIRcube = h5f[vnir_path][()]
        lat      = h5f[lat_path][()]
        lon      = h5f[lon_path][()]
        
        print("Using swath:", swath)
        print("SWIR path:", swir_path)
        print("VNIR path:", vnir_path)
        print("LAT/LON paths:", lat_path, lon_path)
        


        
        #checks SWIR, VNIR and latitude/longitude array shapes
        print ("SWIRcube.shape",SWIRcube.shape)
        print ("VNIRcube.shape",VNIRcube.shape)
        print ("lat.shape",lat.shape)        
        
        #create lists from latitude/longitude values
        lonIter=list(chain.from_iterable(lon))
        latIter=list(chain.from_iterable(lat))


        
        
        #create a list from VNIR and SWIR cube values
        listBand=[]
        for band in range(0,VNIRcube.shape[1]):#VNIRcube.shape[1] gives the number of bands (:66)
            for x in range(0,lat.shape[0]):#lat.shape[0] gives the number of rows
                element=VNIRcube[x][band]
                listBand.append(element)
        for band1 in range(0,SWIRcube.shape[1]):#SWIRcube.shape[1] gives the number of bands (:137)
            for x1 in range(0,lat.shape[0]):#lat.shape[0] gives the number of rows
                element=SWIRcube[x1][band1]
                listBand.append(element)

        #convert list with values to a numpy array      
        data=np.array(listBand,dtype=np.uint16)

        #checks array shape
        print ("data.shape",data.shape)

        #reshape numpy array with the right number of bands, rows and columns
        dataReshaped=data.reshape([VNIRcube.shape[1]+SWIRcube.shape[1], lat.shape[0], lat.shape[1]])
        print ("reshaped data.shape",dataReshaped.shape)

        #get minimum and maximum latitude and longitude
        xmin,ymin,xmax,ymax = [lon.min(),lat.min(),lon.max(),lat.max()]

        #get pixel spatial resolution
        xres = (xmax-xmin)/lat.shape[1]#lat.shape[1] gives the number of cols
        yres = (ymax-ymin)/lat.shape[0]#lat.shape[0] gives the number of rows

        #define coordinates
        geotransform=(xmin,xres,0,ymax,0, -yres)#zeros (third and fifth parameters) are for rotation

        #define GeoTIFF structure and output filename
        output_raster = gdal.GetDriverByName('GTiff').Create(filename [:-3]+"tif",lat.shape[1], lat.shape[0], VNIRcube.shape[1]+SWIRcube.shape[1] ,gdal.GDT_Float32)  # Open the file
        
        #loop over all bands and write it to the GeoTIFF
        for b in range(1,VNIRcube.shape[1]+SWIRcube.shape[1]):
            print("converting band",b)
            outband = output_raster.GetRasterBand(b) 
            outband.WriteArray(dataReshaped[b,:,:])
        #specify coordinates to WGS84
        output_raster.SetGeoTransform(geotransform)  
        srs = osr.SpatialReference()                 
        srs.ImportFromEPSG(4326)                                                               
        output_raster.SetProjection(srs.ExportToWkt())

        #clean memory     
        output_raster.FlushCache()
        print("Conversion from he5 PRISMA file to GeoTIFF complete.")

In [12]:
###Put one or many files in a folder. Modify the folder path and run this part of the code:

In [13]:
#enter folder path with he5 PRISMA files in it
folderPath= r"C:\Users\smclaugh1\OneDrive - Freeport-McMoRan Inc\projects_2026\PRISMA\PRS_L2D_STD_20241213102717_20241213102721_0001"
listImages=[]
for file in os.listdir(folderPath):
      listImages.append(os.path.join(folderPath, file))
print ("he5 image list: ", listImages)

#apply function PRISMA2GeoTIFF
for filename in listImages:
    print("Processing image", filename)
    PRISMA2GeoTIFF(filename)
print ("All files processed.")

he5 image list:  ['C:\\Users\\smclaugh1\\OneDrive - Freeport-McMoRan Inc\\projects_2026\\PRISMA\\PRS_L1_STD_OFFL_20210302034052_20210302034056_0001\\PRS_L1_STD_OFFL_20210302034052_20210302034056_0001.he5']
Processing image C:\Users\smclaugh1\OneDrive - Freeport-McMoRan Inc\projects_2026\PRISMA\PRS_L1_STD_OFFL_20210302034052_20210302034056_0001\PRS_L1_STD_OFFL_20210302034052_20210302034056_0001.he5
Group: HDFEOS
Group: HDFEOS/ADDITIONAL
Group: HDFEOS/ADDITIONAL/FILE_ATTRIBUTES
Group: HDFEOS/SWATHS
Group: HDFEOS/SWATHS/PRS_L1_HCO
Group: HDFEOS/SWATHS/PRS_L1_HCO/Data Fields
Dataset: HDFEOS/SWATHS/PRS_L1_HCO/Data Fields/Cloud_Mask
Dataset: HDFEOS/SWATHS/PRS_L1_HCO/Data Fields/FrameNumber
Dataset: HDFEOS/SWATHS/PRS_L1_HCO/Data Fields/LandCover_Mask
Dataset: HDFEOS/SWATHS/PRS_L1_HCO/Data Fields/SWIR_Cube
Dataset: HDFEOS/SWATHS/PRS_L1_HCO/Data Fields/SWIR_PIXEL_SAT_ERR_MATRIX
Dataset: HDFEOS/SWATHS/PRS_L1_HCO/Data Fields/SunGlint_Mask
Dataset: HDFEOS/SWATHS/PRS_L1_HCO/Data Fields/VNIR_Cube
Da



converting band 73
converting band 74
converting band 75
converting band 76
converting band 77
converting band 78
converting band 79
converting band 80
converting band 81
converting band 82
converting band 83
converting band 84
converting band 85
converting band 86
converting band 87
converting band 88
converting band 89
converting band 90
converting band 91
converting band 92
converting band 93
converting band 94
converting band 95
converting band 96
converting band 97
converting band 98
converting band 99
converting band 100
converting band 101
converting band 102
converting band 103
converting band 104
converting band 105
converting band 106
converting band 107
converting band 108
converting band 109
converting band 110
converting band 111
converting band 112
converting band 113
converting band 114
converting band 115
converting band 116
converting band 117
converting band 118
converting band 119
converting band 120
converting band 121
converting band 122
converting band 123
convert