In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import rasterio
import os
import re
from rasterio.mask import mask as rst_mask
from collections import defaultdict
from shapely.geometry import mapping
import rioxarray

In [2]:
def find_hourly_ffp_shapefiles(target_date, file_directory):
    
    target_date = f"{target_date[:4]}-{target_date[4:6]}-{target_date[6:]}"
    pattern = re.compile(rf'_{target_date}_\d{{1,2}}\.')
    

    # List to store matching files
    matching_files = []

    # Loop through all files in the directory
    for filename in os.listdir(file_directory):
        if filename.endswith('.shp'):  # Only check .xyz files
            # Search for the pattern in the filename
            if pattern.search(filename):
                matching_files.append(filename)

    return matching_files

In [3]:
# 4 years of data for gatesburg
Gatesburg_2019_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2019')
Gatesburg_2020_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2020')
Gatesburg_2021_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2021')
Gatesburg_2022_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2022')
Gatesburg_2024_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2024')

# test sites
US_HWB_2017_PS_dir = os.path.join(os.getcwd(), 'Data', 'US-HWB', '2017')

In [4]:
# FFP
UC1_hourly_ffp_dir = os.path.join(os.path.dirname(os.getcwd()), '2_EC_footprint_area', 'US-UC1_footprints_hourly')
UC2_hourly_ffp_dir = os.path.join(os.path.dirname(os.getcwd()), '2_EC_footprint_area', 'US-UC2_footprints_hourly')
HWB_hourly_ffp_dir = os.path.join(os.path.dirname(os.getcwd()), '2_EC_footprint_area', 'US-HWB_footprints_hourly')

## Generating ffp rasters

### UC1

In [14]:
# Hourly FFP directory & PS dir & Imputed PS_NDVI directory
Hourly_FFP_dir = UC1_hourly_ffp_dir
PS_files_dir = Gatesburg_2019_PS_dir
output_dir_name = 'Hourly_FFP_NDVI_UC1'

PS_ndvi_dir = os.path.join(PS_files_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed')
output_directory = os.path.join(PS_ndvi_dir, output_dir_name)
os.makedirs(output_directory, exist_ok=True)

### 2019 ###
for ndvi_file in os.listdir(PS_ndvi_dir):
    ndvi_file_path = os.path.join(PS_ndvi_dir, ndvi_file)
    ndvi_date = ndvi_file[:8]
    ndvi_date_shapefile = find_hourly_ffp_shapefiles(ndvi_date, Hourly_FFP_dir)
    if not ndvi_date_shapefile:
        continue
        
    for h_shpfile in ndvi_date_shapefile:
        ffp_hour = h_shpfile[33:-4]
        # Read the shapefile using GeoPandas
        h_shpfile_path = os.path.join(Hourly_FFP_dir, h_shpfile)
        shapefile = gpd.read_file(h_shpfile_path)
    
        # Ensure the geometries are valid (in case there are any issues with the shapes)
        shapefile = shapefile[shapefile.is_valid]

        try:
            # Open the raster file
            with rasterio.open(ndvi_file_path) as src:

                if shapefile.crs != src.crs:
                    shapefile = shapefile.to_crs(src.crs)

                # Convert the geometry to GeoJSON format for use in rasterio
                geometries = [mapping(geometry) for geometry in shapefile.geometry]
                
                # Clip the raster using the shapefile geometry
                out_image, out_transform = rst_mask(src, geometries, crop=True)
            
                # Update the metadata to match the new clipped raster
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })
            
                # Write the clipped raster to the output file
                output_raster_path = os.path.join(output_directory, ndvi_file.replace('.tif', f'_{ffp_hour}_HFFP.tif'))
                with rasterio.open(output_raster_path, "w", **out_meta) as dest:
                    dest.write(out_image)
                    
        except ValueError as e:
            if str(e) == 'Input shapes do not overlap raster.':
                print(f"Warning: The shapefile {h_shpfile} does not overlap the raster.")
            else:
                raise

In [6]:
# Hourly FFP directory & PS dir & Imputed PS_NDVI directory
Hourly_FFP_dir = UC1_hourly_ffp_dir
PS_files_dir = Gatesburg_2020_PS_dir
output_dir_name = 'Hourly_FFP_NDVI_UC1'

PS_ndvi_dir = os.path.join(PS_files_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed')
output_directory = os.path.join(PS_ndvi_dir, output_dir_name)
os.makedirs(output_directory, exist_ok=True)

### 2019 ###
for ndvi_file in os.listdir(PS_ndvi_dir):
    ndvi_file_path = os.path.join(PS_ndvi_dir, ndvi_file)
    ndvi_date = ndvi_file[:8]
    ndvi_date_shapefile = find_hourly_ffp_shapefiles(ndvi_date, Hourly_FFP_dir)
    if not ndvi_date_shapefile:
        continue
        
    for h_shpfile in ndvi_date_shapefile:
        ffp_hour = h_shpfile[33:-4]
        # Read the shapefile using GeoPandas
        h_shpfile_path = os.path.join(Hourly_FFP_dir, h_shpfile)
        shapefile = gpd.read_file(h_shpfile_path)
    
        # Ensure the geometries are valid (in case there are any issues with the shapes)
        shapefile = shapefile[shapefile.is_valid]

        try:
            # Open the raster file
            with rasterio.open(ndvi_file_path) as src:

                if shapefile.crs != src.crs:
                    shapefile = shapefile.to_crs(src.crs)

                # Convert the geometry to GeoJSON format for use in rasterio
                geometries = [mapping(geometry) for geometry in shapefile.geometry]
                
                # Clip the raster using the shapefile geometry
                out_image, out_transform = rst_mask(src, geometries, crop=True)
            
                # Update the metadata to match the new clipped raster
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })
            
                # Write the clipped raster to the output file
                output_raster_path = os.path.join(output_directory, ndvi_file.replace('.tif', f'_{ffp_hour}_HFFP.tif'))
                with rasterio.open(output_raster_path, "w", **out_meta) as dest:
                    dest.write(out_image)
                    
        except ValueError as e:
            if str(e) == 'Input shapes do not overlap raster.':
                print(f"Warning: The shapefile {h_shpfile} does not overlap the raster.")
            else:
                raise

In [7]:
# Hourly FFP directory & PS dir & Imputed PS_NDVI directory
Hourly_FFP_dir = UC1_hourly_ffp_dir
PS_files_dir = Gatesburg_2021_PS_dir
output_dir_name = 'Hourly_FFP_NDVI_UC1'

PS_ndvi_dir = os.path.join(PS_files_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed')
output_directory = os.path.join(PS_ndvi_dir, output_dir_name)
os.makedirs(output_directory, exist_ok=True)

### 2019 ###
for ndvi_file in os.listdir(PS_ndvi_dir):
    ndvi_file_path = os.path.join(PS_ndvi_dir, ndvi_file)
    ndvi_date = ndvi_file[:8]
    ndvi_date_shapefile = find_hourly_ffp_shapefiles(ndvi_date, Hourly_FFP_dir)
    if not ndvi_date_shapefile:
        continue
        
    for h_shpfile in ndvi_date_shapefile:
        ffp_hour = h_shpfile[33:-4]
        # Read the shapefile using GeoPandas
        h_shpfile_path = os.path.join(Hourly_FFP_dir, h_shpfile)
        shapefile = gpd.read_file(h_shpfile_path)
    
        # Ensure the geometries are valid (in case there are any issues with the shapes)
        shapefile = shapefile[shapefile.is_valid]

        try:
            # Open the raster file
            with rasterio.open(ndvi_file_path) as src:

                if shapefile.crs != src.crs:
                    shapefile = shapefile.to_crs(src.crs)

                # Convert the geometry to GeoJSON format for use in rasterio
                geometries = [mapping(geometry) for geometry in shapefile.geometry]
                
                # Clip the raster using the shapefile geometry
                out_image, out_transform = rst_mask(src, geometries, crop=True)
            
                # Update the metadata to match the new clipped raster
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })
            
                # Write the clipped raster to the output file
                output_raster_path = os.path.join(output_directory, ndvi_file.replace('.tif', f'_{ffp_hour}_HFFP.tif'))
                with rasterio.open(output_raster_path, "w", **out_meta) as dest:
                    dest.write(out_image)
                    
        except ValueError as e:
            if str(e) == 'Input shapes do not overlap raster.':
                print(f"Warning: The shapefile {h_shpfile} does not overlap the raster.")
            else:
                raise

In [8]:
# Hourly FFP directory & PS dir & Imputed PS_NDVI directory
Hourly_FFP_dir = UC1_hourly_ffp_dir
PS_files_dir = Gatesburg_2022_PS_dir
output_dir_name = 'Hourly_FFP_NDVI_UC1'

PS_ndvi_dir = os.path.join(PS_files_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed')
output_directory = os.path.join(PS_ndvi_dir, output_dir_name)
os.makedirs(output_directory, exist_ok=True)

### 2019 ###
for ndvi_file in os.listdir(PS_ndvi_dir):
    ndvi_file_path = os.path.join(PS_ndvi_dir, ndvi_file)
    ndvi_date = ndvi_file[:8]
    ndvi_date_shapefile = find_hourly_ffp_shapefiles(ndvi_date, Hourly_FFP_dir)
    if not ndvi_date_shapefile:
        continue
        
    for h_shpfile in ndvi_date_shapefile:
        ffp_hour = h_shpfile[33:-4]
        # Read the shapefile using GeoPandas
        h_shpfile_path = os.path.join(Hourly_FFP_dir, h_shpfile)
        shapefile = gpd.read_file(h_shpfile_path)
    
        # Ensure the geometries are valid (in case there are any issues with the shapes)
        shapefile = shapefile[shapefile.is_valid]

        try:
            # Open the raster file
            with rasterio.open(ndvi_file_path) as src:

                if shapefile.crs != src.crs:
                    shapefile = shapefile.to_crs(src.crs)

                # Convert the geometry to GeoJSON format for use in rasterio
                geometries = [mapping(geometry) for geometry in shapefile.geometry]
                
                # Clip the raster using the shapefile geometry
                out_image, out_transform = rst_mask(src, geometries, crop=True)
            
                # Update the metadata to match the new clipped raster
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })
            
                # Write the clipped raster to the output file
                output_raster_path = os.path.join(output_directory, ndvi_file.replace('.tif', f'_{ffp_hour}_HFFP.tif'))
                with rasterio.open(output_raster_path, "w", **out_meta) as dest:
                    dest.write(out_image)
                    
        except ValueError as e:
            if str(e) == 'Input shapes do not overlap raster.':
                print(f"Warning: The shapefile {h_shpfile} does not overlap the raster.")
            else:
                raise

In [5]:
# Hourly FFP directory & PS dir & Imputed PS_NDVI directory
Hourly_FFP_dir = UC1_hourly_ffp_dir
PS_files_dir = Gatesburg_2024_PS_dir
output_dir_name = 'Hourly_FFP_NDVI_UC1'

PS_ndvi_dir = os.path.join(PS_files_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed')
output_directory = os.path.join(PS_ndvi_dir, output_dir_name)
os.makedirs(output_directory, exist_ok=True)

### 2019 ###
for ndvi_file in os.listdir(PS_ndvi_dir):
    ndvi_file_path = os.path.join(PS_ndvi_dir, ndvi_file)
    ndvi_date = ndvi_file[:8]
    ndvi_date_shapefile = find_hourly_ffp_shapefiles(ndvi_date, Hourly_FFP_dir)
    if not ndvi_date_shapefile:
        continue
        
    for h_shpfile in ndvi_date_shapefile:
        ffp_hour = h_shpfile[33:-4]
        # Read the shapefile using GeoPandas
        h_shpfile_path = os.path.join(Hourly_FFP_dir, h_shpfile)
        shapefile = gpd.read_file(h_shpfile_path)
    
        # Ensure the geometries are valid (in case there are any issues with the shapes)
        shapefile = shapefile[shapefile.is_valid]

        try:
            # Open the raster file
            with rasterio.open(ndvi_file_path) as src:

                if shapefile.crs != src.crs:
                    shapefile = shapefile.to_crs(src.crs)

                # Convert the geometry to GeoJSON format for use in rasterio
                geometries = [mapping(geometry) for geometry in shapefile.geometry]
                
                # Clip the raster using the shapefile geometry
                out_image, out_transform = rst_mask(src, geometries, crop=True)
            
                # Update the metadata to match the new clipped raster
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })
            
                # Write the clipped raster to the output file
                output_raster_path = os.path.join(output_directory, ndvi_file.replace('.tif', f'_{ffp_hour}_HFFP.tif'))
                with rasterio.open(output_raster_path, "w", **out_meta) as dest:
                    dest.write(out_image)
                    
        except ValueError as e:
            if str(e) == 'Input shapes do not overlap raster.':
                print(f"Warning: The shapefile {h_shpfile} does not overlap the raster.")
            else:
                raise

### UC2

In [9]:
# Hourly FFP directory & PS dir & Imputed PS_NDVI directory
Hourly_FFP_dir = UC2_hourly_ffp_dir
PS_files_dir = Gatesburg_2019_PS_dir
output_dir_name = 'Hourly_FFP_NDVI_UC2'

PS_ndvi_dir = os.path.join(PS_files_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed')
output_directory = os.path.join(PS_ndvi_dir, output_dir_name)
os.makedirs(output_directory, exist_ok=True)

### 2019 ###
for ndvi_file in os.listdir(PS_ndvi_dir):
    ndvi_file_path = os.path.join(PS_ndvi_dir, ndvi_file)
    ndvi_date = ndvi_file[:8]
    ndvi_date_shapefile = find_hourly_ffp_shapefiles(ndvi_date, Hourly_FFP_dir)
    if not ndvi_date_shapefile:
        continue
        
    for h_shpfile in ndvi_date_shapefile:
        ffp_hour = h_shpfile[33:-4]
        # Read the shapefile using GeoPandas
        h_shpfile_path = os.path.join(Hourly_FFP_dir, h_shpfile)
        shapefile = gpd.read_file(h_shpfile_path)
    
        # Ensure the geometries are valid (in case there are any issues with the shapes)
        shapefile = shapefile[shapefile.is_valid]

        try:
            # Open the raster file
            with rasterio.open(ndvi_file_path) as src:

                if shapefile.crs != src.crs:
                    shapefile = shapefile.to_crs(src.crs)

                # Convert the geometry to GeoJSON format for use in rasterio
                geometries = [mapping(geometry) for geometry in shapefile.geometry]
                
                # Clip the raster using the shapefile geometry
                out_image, out_transform = rst_mask(src, geometries, crop=True)
            
                # Update the metadata to match the new clipped raster
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })
            
                # Write the clipped raster to the output file
                output_raster_path = os.path.join(output_directory, ndvi_file.replace('.tif', f'_{ffp_hour}_HFFP.tif'))
                with rasterio.open(output_raster_path, "w", **out_meta) as dest:
                    dest.write(out_image)
                    
        except ValueError as e:
            if str(e) == 'Input shapes do not overlap raster.':
                print(f"Warning: The shapefile {h_shpfile} does not overlap the raster.")
            else:
                raise

In [10]:
# Hourly FFP directory & PS dir & Imputed PS_NDVI directory
Hourly_FFP_dir = UC2_hourly_ffp_dir
PS_files_dir = Gatesburg_2020_PS_dir
output_dir_name = 'Hourly_FFP_NDVI_UC2'

PS_ndvi_dir = os.path.join(PS_files_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed')
output_directory = os.path.join(PS_ndvi_dir, output_dir_name)
os.makedirs(output_directory, exist_ok=True)

### 2019 ###
for ndvi_file in os.listdir(PS_ndvi_dir):
    ndvi_file_path = os.path.join(PS_ndvi_dir, ndvi_file)
    ndvi_date = ndvi_file[:8]
    ndvi_date_shapefile = find_hourly_ffp_shapefiles(ndvi_date, Hourly_FFP_dir)
    if not ndvi_date_shapefile:
        continue
        
    for h_shpfile in ndvi_date_shapefile:
        ffp_hour = h_shpfile[33:-4]
        # Read the shapefile using GeoPandas
        h_shpfile_path = os.path.join(Hourly_FFP_dir, h_shpfile)
        shapefile = gpd.read_file(h_shpfile_path)
    
        # Ensure the geometries are valid (in case there are any issues with the shapes)
        shapefile = shapefile[shapefile.is_valid]

        try:
            # Open the raster file
            with rasterio.open(ndvi_file_path) as src:

                if shapefile.crs != src.crs:
                    shapefile = shapefile.to_crs(src.crs)

                # Convert the geometry to GeoJSON format for use in rasterio
                geometries = [mapping(geometry) for geometry in shapefile.geometry]
                
                # Clip the raster using the shapefile geometry
                out_image, out_transform = rst_mask(src, geometries, crop=True)
            
                # Update the metadata to match the new clipped raster
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })
            
                # Write the clipped raster to the output file
                output_raster_path = os.path.join(output_directory, ndvi_file.replace('.tif', f'_{ffp_hour}_HFFP.tif'))
                with rasterio.open(output_raster_path, "w", **out_meta) as dest:
                    dest.write(out_image)
                    
        except ValueError as e:
            if str(e) == 'Input shapes do not overlap raster.':
                print(f"Warning: The shapefile {h_shpfile} does not overlap the raster.")
            else:
                raise

In [11]:
# Hourly FFP directory & PS dir & Imputed PS_NDVI directory
Hourly_FFP_dir = UC2_hourly_ffp_dir
PS_files_dir = Gatesburg_2021_PS_dir
output_dir_name = 'Hourly_FFP_NDVI_UC2'

PS_ndvi_dir = os.path.join(PS_files_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed')
output_directory = os.path.join(PS_ndvi_dir, output_dir_name)
os.makedirs(output_directory, exist_ok=True)

### 2019 ###
for ndvi_file in os.listdir(PS_ndvi_dir):
    ndvi_file_path = os.path.join(PS_ndvi_dir, ndvi_file)
    ndvi_date = ndvi_file[:8]
    ndvi_date_shapefile = find_hourly_ffp_shapefiles(ndvi_date, Hourly_FFP_dir)
    if not ndvi_date_shapefile:
        continue
        
    for h_shpfile in ndvi_date_shapefile:
        ffp_hour = h_shpfile[33:-4]
        # Read the shapefile using GeoPandas
        h_shpfile_path = os.path.join(Hourly_FFP_dir, h_shpfile)
        shapefile = gpd.read_file(h_shpfile_path)
    
        # Ensure the geometries are valid (in case there are any issues with the shapes)
        shapefile = shapefile[shapefile.is_valid]

        try:
            # Open the raster file
            with rasterio.open(ndvi_file_path) as src:

                if shapefile.crs != src.crs:
                    shapefile = shapefile.to_crs(src.crs)

                # Convert the geometry to GeoJSON format for use in rasterio
                geometries = [mapping(geometry) for geometry in shapefile.geometry]
                
                # Clip the raster using the shapefile geometry
                out_image, out_transform = rst_mask(src, geometries, crop=True)
            
                # Update the metadata to match the new clipped raster
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })
            
                # Write the clipped raster to the output file
                output_raster_path = os.path.join(output_directory, ndvi_file.replace('.tif', f'_{ffp_hour}_HFFP.tif'))
                with rasterio.open(output_raster_path, "w", **out_meta) as dest:
                    dest.write(out_image)
                    
        except ValueError as e:
            if str(e) == 'Input shapes do not overlap raster.':
                print(f"Warning: The shapefile {h_shpfile} does not overlap the raster.")
            else:
                raise

In [12]:
# Hourly FFP directory & PS dir & Imputed PS_NDVI directory
Hourly_FFP_dir = UC2_hourly_ffp_dir
PS_files_dir = Gatesburg_2022_PS_dir
output_dir_name = 'Hourly_FFP_NDVI_UC2'

PS_ndvi_dir = os.path.join(PS_files_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed')
output_directory = os.path.join(PS_ndvi_dir, output_dir_name)
os.makedirs(output_directory, exist_ok=True)

### 2019 ###
for ndvi_file in os.listdir(PS_ndvi_dir):
    ndvi_file_path = os.path.join(PS_ndvi_dir, ndvi_file)
    ndvi_date = ndvi_file[:8]
    ndvi_date_shapefile = find_hourly_ffp_shapefiles(ndvi_date, Hourly_FFP_dir)
    if not ndvi_date_shapefile:
        continue
        
    for h_shpfile in ndvi_date_shapefile:
        ffp_hour = h_shpfile[33:-4]
        # Read the shapefile using GeoPandas
        h_shpfile_path = os.path.join(Hourly_FFP_dir, h_shpfile)
        shapefile = gpd.read_file(h_shpfile_path)
    
        # Ensure the geometries are valid (in case there are any issues with the shapes)
        shapefile = shapefile[shapefile.is_valid]

        try:
            # Open the raster file
            with rasterio.open(ndvi_file_path) as src:

                if shapefile.crs != src.crs:
                    shapefile = shapefile.to_crs(src.crs)

                # Convert the geometry to GeoJSON format for use in rasterio
                geometries = [mapping(geometry) for geometry in shapefile.geometry]
                
                # Clip the raster using the shapefile geometry
                out_image, out_transform = rst_mask(src, geometries, crop=True)
            
                # Update the metadata to match the new clipped raster
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })
            
                # Write the clipped raster to the output file
                output_raster_path = os.path.join(output_directory, ndvi_file.replace('.tif', f'_{ffp_hour}_HFFP.tif'))
                with rasterio.open(output_raster_path, "w", **out_meta) as dest:
                    dest.write(out_image)
                    
        except ValueError as e:
            if str(e) == 'Input shapes do not overlap raster.':
                print(f"Warning: The shapefile {h_shpfile} does not overlap the raster.")
            else:
                raise

In [6]:
# Hourly FFP directory & PS dir & Imputed PS_NDVI directory
Hourly_FFP_dir = UC2_hourly_ffp_dir
PS_files_dir = Gatesburg_2024_PS_dir
output_dir_name = 'Hourly_FFP_NDVI_UC2'

PS_ndvi_dir = os.path.join(PS_files_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed')
output_directory = os.path.join(PS_ndvi_dir, output_dir_name)
os.makedirs(output_directory, exist_ok=True)

### 2019 ###
for ndvi_file in os.listdir(PS_ndvi_dir):
    ndvi_file_path = os.path.join(PS_ndvi_dir, ndvi_file)
    ndvi_date = ndvi_file[:8]
    ndvi_date_shapefile = find_hourly_ffp_shapefiles(ndvi_date, Hourly_FFP_dir)
    if not ndvi_date_shapefile:
        continue
        
    for h_shpfile in ndvi_date_shapefile:
        ffp_hour = h_shpfile[33:-4]
        # Read the shapefile using GeoPandas
        h_shpfile_path = os.path.join(Hourly_FFP_dir, h_shpfile)
        shapefile = gpd.read_file(h_shpfile_path)
    
        # Ensure the geometries are valid (in case there are any issues with the shapes)
        shapefile = shapefile[shapefile.is_valid]

        try:
            # Open the raster file
            with rasterio.open(ndvi_file_path) as src:

                if shapefile.crs != src.crs:
                    shapefile = shapefile.to_crs(src.crs)

                # Convert the geometry to GeoJSON format for use in rasterio
                geometries = [mapping(geometry) for geometry in shapefile.geometry]
                
                # Clip the raster using the shapefile geometry
                out_image, out_transform = rst_mask(src, geometries, crop=True)
            
                # Update the metadata to match the new clipped raster
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })
            
                # Write the clipped raster to the output file
                output_raster_path = os.path.join(output_directory, ndvi_file.replace('.tif', f'_{ffp_hour}_HFFP.tif'))
                with rasterio.open(output_raster_path, "w", **out_meta) as dest:
                    dest.write(out_image)
                    
        except ValueError as e:
            if str(e) == 'Input shapes do not overlap raster.':
                print(f"Warning: The shapefile {h_shpfile} does not overlap the raster.")
            else:
                raise

### HWB

In [43]:
# Hourly FFP directory & PS dir & Imputed PS_NDVI directory
Hourly_FFP_dir = HWB_hourly_ffp_dir
PS_files_dir = US_HWB_2017_PS_dir
output_dir_name = 'Hourly_FFP_NDVI_HWB'

PS_ndvi_dir = os.path.join(PS_files_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed')
output_directory = os.path.join(PS_ndvi_dir, output_dir_name)
os.makedirs(output_directory, exist_ok=True)

### 2019 ###
for ndvi_file in os.listdir(PS_ndvi_dir):
    ndvi_file_path = os.path.join(PS_ndvi_dir, ndvi_file)
    ndvi_date = ndvi_file[:8]
    ndvi_date_shapefile = find_hourly_ffp_shapefiles(ndvi_date, Hourly_FFP_dir)
    if not ndvi_date_shapefile:
        continue
        
    for h_shpfile in ndvi_date_shapefile:
        ffp_hour = h_shpfile[33:-4]
        # Read the shapefile using GeoPandas
        h_shpfile_path = os.path.join(Hourly_FFP_dir, h_shpfile)
        shapefile = gpd.read_file(h_shpfile_path)
    
        # Ensure the geometries are valid (in case there are any issues with the shapes)
        shapefile = shapefile[shapefile.is_valid]

        try:
            # Open the raster file
            with rasterio.open(ndvi_file_path) as src:

                if shapefile.crs != src.crs:
                    shapefile = shapefile.to_crs(src.crs)

                # Convert the geometry to GeoJSON format for use in rasterio
                geometries = [mapping(geometry) for geometry in shapefile.geometry]
                
                # Clip the raster using the shapefile geometry
                out_image, out_transform = rst_mask(src, geometries, crop=True)
            
                # Update the metadata to match the new clipped raster
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })
            
                # Write the clipped raster to the output file
                output_raster_path = os.path.join(output_directory, ndvi_file.replace('.tif', f'_{ffp_hour}_HFFP.tif'))
                with rasterio.open(output_raster_path, "w", **out_meta) as dest:
                    dest.write(out_image)
                    
        except ValueError as e:
            if str(e) == 'Input shapes do not overlap raster.':
                print(f"Warning: The shapefile {h_shpfile} does not overlap the raster.")
            else:
                raise

CPLE_AppDefinedError: Deleting C:/Users/adadkhah/OneDrive - University of Vermont/UVM/Projects/ET/Hourly_predictions_version/3_PlanetScope_data/Data/US-HWB/2017/NDVI/standardized_crs/NDVI_imputed/Hourly_FFP_NDVI_HWB/20170604_PS_NDVI_imputed_14_HFFP.tif failed: Permission denied

In [41]:
ndvi_file

'20170927_PS_NDVI_imputed.tif'

In [42]:
ffp_hour

'7'

## Extracting FFP NDVI df (with datetime)

In [7]:
def ffp_NDVI_raster_to_gdf_with_datetime(multiband_raster, Band_name='NDVI'):
    
    image_date = os.path.basename(multiband_raster)[:8]
    image_hour = os.path.basename(multiband_raster)[25:-9]
    
    with rioxarray.open_rasterio(multiband_raster) as raster:
        num_bands = raster.rio.count
        for i in range(1, num_bands+1):
            
            selected_data = raster.isel(band=i-1)
            raster.name = Band_name
            nodata_value = raster.rio.nodata
            #print(nodata_value)
            df = raster.squeeze().to_dataframe().reset_index()
            geometry = gpd.points_from_xy(df.x, df.y)
            gdf = gpd.GeoDataFrame(df, crs=raster.rio.crs, geometry=geometry).to_crs(epsg=4326)
            gdf['latitude'] = gdf.geometry.y
            gdf['longitude'] = gdf.geometry.x

            gdf = gdf.drop(columns=['x', 'y', 'spatial_ref', 'band'], errors='ignore')
            gdf['Date'] = image_date
            gdf['hour'] = image_hour
            
            return gdf[['Date', 'hour', 'latitude', 'longitude', 'NDVI', 'geometry']]

In [8]:
# 4 years of data for gatesburg
Gatesburg_2019_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2019')
Gatesburg_2020_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2020')
Gatesburg_2021_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2021')
Gatesburg_2022_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2022')
Gatesburg_2024_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2024')

# test sites
US_HWB_2017_PS_dir = os.path.join(os.getcwd(), 'Data', 'US-HWB', '2017')

# Hourly FFP NDVI directory
UC1_ffp_NDVI_2019_dir = os.path.join(Gatesburg_2019_PS_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed', 'Hourly_FFP_NDVI_UC1')
UC1_ffp_NDVI_2020_dir = os.path.join(Gatesburg_2020_PS_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed', 'Hourly_FFP_NDVI_UC1')
UC1_ffp_NDVI_2021_dir = os.path.join(Gatesburg_2021_PS_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed', 'Hourly_FFP_NDVI_UC1')
UC1_ffp_NDVI_2022_dir = os.path.join(Gatesburg_2022_PS_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed', 'Hourly_FFP_NDVI_UC1')
UC1_ffp_NDVI_2024_dir = os.path.join(Gatesburg_2024_PS_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed', 'Hourly_FFP_NDVI_UC1')

UC2_ffp_NDVI_2019_dir = os.path.join(Gatesburg_2019_PS_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed', 'Hourly_FFP_NDVI_UC2')
UC2_ffp_NDVI_2020_dir = os.path.join(Gatesburg_2020_PS_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed', 'Hourly_FFP_NDVI_UC2')
UC2_ffp_NDVI_2021_dir = os.path.join(Gatesburg_2021_PS_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed', 'Hourly_FFP_NDVI_UC2')
UC2_ffp_NDVI_2022_dir = os.path.join(Gatesburg_2022_PS_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed', 'Hourly_FFP_NDVI_UC2')
UC2_ffp_NDVI_2024_dir = os.path.join(Gatesburg_2024_PS_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed', 'Hourly_FFP_NDVI_UC2')


HWB_ffp_NDVI_2017_dir = os.path.join(US_HWB_2017_PS_dir, 'NDVI', 'standardized_crs', 'NDVI_imputed', 'Hourly_FFP_NDVI_HWB')

In [44]:
# Export hourly mean PS NDVI to csv files
hourly_ffp_ndvi_dir_list = [UC1_ffp_NDVI_2019_dir, UC1_ffp_NDVI_2020_dir, UC1_ffp_NDVI_2021_dir, UC1_ffp_NDVI_2022_dir, 
                            UC2_ffp_NDVI_2019_dir, UC2_ffp_NDVI_2020_dir, UC2_ffp_NDVI_2021_dir, UC2_ffp_NDVI_2022_dir,
                            HWB_ffp_NDVI_2017_dir]

for Hourly_ffp_NDVI_raster_dir in hourly_ffp_ndvi_dir_list:

    all_gdfs = []
    for filename in os.listdir(Hourly_ffp_NDVI_raster_dir):
        if filename.endswith('.tif'):  # Assuming all rasters are in .tif format
            raster_path = os.path.join(Hourly_ffp_NDVI_raster_dir, filename)
            all_gdfs.append(ffp_NDVI_raster_to_gdf_with_datetime(raster_path))

    combined_gdf = gpd.GeoDataFrame(pd.concat(all_gdfs, ignore_index=True))
    filtered_combined_gdf = combined_gdf[combined_gdf['NDVI']!=0]

    # Export only 
    path_basename = os.path.basename(Hourly_ffp_NDVI_raster_dir)
    year = re.search(r'\b(19|20)\d{2}\b', Hourly_ffp_NDVI_raster_dir).group()
    filtered_combined_gdf.groupby(['Date', 'hour'])['NDVI'].mean().reset_index().to_csv(f'PS_AVE_{path_basename}_{year}.csv', index=False)

In [21]:
filtered_combined_gdf.groupby(['Date', 'hour'])['NDVI'].mean().reset_index()

Unnamed: 0,Date,hour,NDVI
0,20190520,10_HFFP,0.580858
1,20190520,11_HFFP,0.560801
2,20190520,12_HFFP,0.544683
3,20190520,13_HFFP,0.565204
4,20190520,14_HFFP,0.539253
...,...,...,...
1467,20190911,16_HFFP,0.694257
1468,20190911,17_HFFP,0.693347
1469,20190911,7_HFFP,0.691865
1470,20190911,8_HFFP,0.691754


In [9]:
# Export hourly mean PS NDVI to csv files
hourly_ffp_ndvi_dir_list = [UC1_ffp_NDVI_2024_dir, UC2_ffp_NDVI_2024_dir]

for Hourly_ffp_NDVI_raster_dir in hourly_ffp_ndvi_dir_list:

    all_gdfs = []
    for filename in os.listdir(Hourly_ffp_NDVI_raster_dir):
        if filename.endswith('.tif'):  # Assuming all rasters are in .tif format
            raster_path = os.path.join(Hourly_ffp_NDVI_raster_dir, filename)
            all_gdfs.append(ffp_NDVI_raster_to_gdf_with_datetime(raster_path))

    combined_gdf = gpd.GeoDataFrame(pd.concat(all_gdfs, ignore_index=True))
    filtered_combined_gdf = combined_gdf[combined_gdf['NDVI']!=0]

    # Export only 
    path_basename = os.path.basename(Hourly_ffp_NDVI_raster_dir)
    year = re.search(r'\b(19|20)\d{2}\b', Hourly_ffp_NDVI_raster_dir).group()
    filtered_combined_gdf.groupby(['Date', 'hour'])['NDVI'].mean().reset_index().to_csv(f'PS_AVE_{path_basename}_{year}.csv', index=False)