In [1]:
import numpy as np
from rasterio.io import MemoryFile
from collections import defaultdict
import rasterio
import os
from xml.dom import minidom
import re

In [2]:
def find_tif_files_with_date(directory):
    # Regular expression pattern to match files with YYYYMMDD and ending with .tif
    pattern = re.compile(r'^(\d{8}).*\.tif$')
    
    # Dictionary to store files grouped by their YYYYMMDD date
    files_by_date = {}

    # Loop through all files in the directory
    for filename in os.listdir(directory):
        match = pattern.match(filename)
        if match:
            date = match.group(1)  # Extract the YYYYMMDD part
            if date not in files_by_date:
                files_by_date[date] = []
            files_by_date[date].append(filename)

    return files_by_date


def group_files_by_date_time(directory):
    """
    This function receives a directory, and returns a dictionary where:
    - Key: 'YYYYMMDD_HHMMSS' extracted from the filename.
    - Value: List of files that share the same 'YYYYMMDD_HHMMSS' prefix.
    
    :param directory: The directory to search files in.
    :return: Dictionary with date_time as keys and list of files as values.
    """
    # Dictionary to store grouped files
    files_dict = defaultdict(list)

    # Walk through the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Split the filename to extract the first two parts (YYYYMMDD and HHMMSS)
            parts = file.split('_')

            # Ensure there are enough parts in the filename
            if len(parts) >= 2:
                # Create a key using the first two parts: YYYYMMDD and HHMMSS
                key = f"{parts[0]}_{parts[1]}"

                # Add the file to the list of files under this key
                files_dict[key].append(file)

    return files_dict
    

def find_meta_files_with_date(directory):
    # Regular expression pattern to match files with YYYYMMDD and ending with .tif
    pattern = re.compile(r'^(\d{8}).*_clip\.xml$')
    
    # Dictionary to store files grouped by their YYYYMMDD date
    files_by_date = {}

    # Loop through all files in the directory
    for filename in os.listdir(directory):
        match = pattern.match(filename)
        if match:
            date = match.group(1)  # Extract the YYYYMMDD part
            if date not in files_by_date:
                files_by_date[date] = []
            files_by_date[date].append(filename)

    return files_by_date
    

def extract_PS_8band_coeff_from_xml(path_to_xml_file):
    xmldoc = minidom.parse(path_to_xml_file)
    nodes = xmldoc.getElementsByTagName("ps:bandSpecificMetadata")

    # XML parser refers to bands by numbers 1-4
    coeffs = {}
    for node in nodes:
        bn = node.getElementsByTagName("ps:bandNumber")[0].firstChild.data
        if bn in ['1', '2', '3', '4', '5', '6', '7', '8']:
            i = int(bn)
            value = node.getElementsByTagName("ps:reflectanceCoefficient")[0].firstChild.data
            coeffs[i] = float(value)

    return coeffs

In [3]:
# 4 years of data for gatesburg
Gatesburg_2019_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2019')
Gatesburg_2020_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2020')
Gatesburg_2021_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2021')
Gatesburg_2022_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2022')
Gatesburg_2024_PS_dir = os.path.join(os.getcwd(), 'Data', 'Gatesburg', '2024')

# test sites
US_HWB_2017_PS_dir = os.path.join(os.getcwd(), 'Data', 'US-HWB')

In [15]:
PS_imagery_dir = Gatesburg_2019_PS_dir

# Step 1: find the images (and metadata files) for the same date and time
PS_file_list_by_date = group_files_by_date_time(PS_imagery_dir)

# write the VI into a new rasterfile
if not os.path.exists(os.path.join(PS_imagery_dir, 'NDVI')):
    os.makedirs(os.path.join(PS_imagery_dir, 'NDVI'))


# Step 2: extract band coeffs for normalizing the surface reflectance to TOA
for key, value in PS_file_list_by_date.items():
    # Find the surface reflectance and metadata file path for each date and time
    date_time_PS_list = PS_file_list_by_date[key]
    
    for file_name in date_time_PS_list:
        if file_name.endswith('SR_clip_masked.tif')|file_name.endswith('SR_8b_clip_masked.tif')|\
        file_name.endswith('AnalyticMS_clip_masked.tif')|file_name.endswith('AnalyticMS_SR_clip_masked.tif'):
            SR_file_path = os.path.join(PS_imagery_dir, file_name)
            image_datetime = file_name[:15]
        elif file_name.endswith('metadata_clip.xml'):
            meta_file_path = os.path.join(PS_imagery_dir, file_name)
        else:
            continue

    
    # get the coeffs assoicated with the image bands
    PS_coeffs = extract_PS_8band_coeff_from_xml(meta_file_path)
    
    # opening the raster file to read the bands for EVI calculation and adjust tp TOA
    with rasterio.open(SR_file_path) as PS_raster:
        if PS_raster.count==8:
            band_blue = PS_raster.read(2) * PS_coeffs[2]
            band_greenI = PS_raster.read(3) * PS_coeffs[3]
            band_green = PS_raster.read(4) * PS_coeffs[4]
            band_yellow = PS_raster.read(5) * PS_coeffs[5]
            band_red = PS_raster.read(6) * PS_coeffs[6]
            band_redE = PS_raster.read(7) * PS_coeffs[7]
            band_NIR = PS_raster.read(8) * PS_coeffs[8]
        elif PS_raster.count==4:
            band_blue = PS_raster.read(1) * PS_coeffs[1]
            band_green = PS_raster.read(2) * PS_coeffs[2]
            band_red = PS_raster.read(3) * PS_coeffs[3]
            band_NIR = PS_raster.read(4) * PS_coeffs[4]

        kwargs = PS_raster.meta
        
   
    # Calculate EVI and NDVI
    np.seterr(divide='ignore', invalid='ignore')
    ndvi = (band_NIR.astype(float) - band_red.astype(float)) / (band_NIR + band_red)
    #EVI = 2.5 * (band_NIR.astype(float) - band_red.astype(float))/(band_NIR.astype(float) + 6*band_red.astype(float) - 7*band_blue.astype(float) + 1)
    #EVI = EVI.astype(np.float32)
        
    kwargs.update(dtype=rasterio.float32, count = 1)
    VI_output_path = os.path.join(PS_imagery_dir, 'NDVI', (image_datetime+'_NDVI.tif'))
    with rasterio.open(VI_output_path, 'w', **kwargs) as dst:
        dst.write(ndvi, 1)

In [None]:
PS_imagery_dir = Gatesburg_2020_PS_dir

# Step 1: find the images (and metadata files) for the same date and time
PS_file_list_by_date = group_files_by_date_time(PS_imagery_dir)

# write the VI into a new rasterfile
if not os.path.exists(os.path.join(PS_imagery_dir, 'NDVI')):
    os.makedirs(os.path.join(PS_imagery_dir, 'NDVI'))


# Step 2: extract band coeffs for normalizing the surface reflectance to TOA
for key, value in PS_file_list_by_date.items():
    # Find the surface reflectance and metadata file path for each date and time
    date_time_PS_list = PS_file_list_by_date[key]
    
    for file_name in date_time_PS_list:
        if file_name.endswith('SR_clip_masked.tif')|file_name.endswith('SR_8b_clip_masked.tif')|\
        file_name.endswith('AnalyticMS_clip_masked.tif')|file_name.endswith('AnalyticMS_SR_clip_masked.tif'):
            SR_file_path = os.path.join(PS_imagery_dir, file_name)
            image_datetime = file_name[:15]
        elif file_name.endswith('metadata_clip.xml'):
            meta_file_path = os.path.join(PS_imagery_dir, file_name)
        else:
            continue

    
    # get the coeffs assoicated with the image bands
    PS_coeffs = extract_PS_8band_coeff_from_xml(meta_file_path)
    
    # opening the raster file to read the bands for EVI calculation and adjust tp TOA
    with rasterio.open(SR_file_path) as PS_raster:
        if PS_raster.count==8:
            band_blue = PS_raster.read(2) * PS_coeffs[2]
            band_greenI = PS_raster.read(3) * PS_coeffs[3]
            band_green = PS_raster.read(4) * PS_coeffs[4]
            band_yellow = PS_raster.read(5) * PS_coeffs[5]
            band_red = PS_raster.read(6) * PS_coeffs[6]
            band_redE = PS_raster.read(7) * PS_coeffs[7]
            band_NIR = PS_raster.read(8) * PS_coeffs[8]
        elif PS_raster.count==4:
            band_blue = PS_raster.read(1) * PS_coeffs[1]
            band_green = PS_raster.read(2) * PS_coeffs[2]
            band_red = PS_raster.read(3) * PS_coeffs[3]
            band_NIR = PS_raster.read(4) * PS_coeffs[4]

        kwargs = PS_raster.meta
        
   
    # Calculate EVI and NDVI
    np.seterr(divide='ignore', invalid='ignore')
    ndvi = (band_NIR.astype(float) - band_red.astype(float)) / (band_NIR + band_red)
    #EVI = 2.5 * (band_NIR.astype(float) - band_red.astype(float))/(band_NIR.astype(float) + 6*band_red.astype(float) - 7*band_blue.astype(float) + 1)
    #EVI = EVI.astype(np.float32)
        
    kwargs.update(dtype=rasterio.float32, count = 1)
    VI_output_path = os.path.join(PS_imagery_dir, 'NDVI', (image_datetime+'_NDVI.tif'))
    with rasterio.open(VI_output_path, 'w', **kwargs) as dst:
        dst.write(ndvi, 1)

In [None]:
PS_imagery_dir = Gatesburg_2021_PS_dir

# Step 1: find the images (and metadata files) for the same date and time
PS_file_list_by_date = group_files_by_date_time(PS_imagery_dir)

# write the VI into a new rasterfile
if not os.path.exists(os.path.join(PS_imagery_dir, 'NDVI')):
    os.makedirs(os.path.join(PS_imagery_dir, 'NDVI'))


# Step 2: extract band coeffs for normalizing the surface reflectance to TOA
for key, value in PS_file_list_by_date.items():
    # Find the surface reflectance and metadata file path for each date and time
    date_time_PS_list = PS_file_list_by_date[key]
    
    for file_name in date_time_PS_list:
        if file_name.endswith('SR_clip_masked.tif')|file_name.endswith('SR_8b_clip_masked.tif')|\
        file_name.endswith('AnalyticMS_clip_masked.tif')|file_name.endswith('AnalyticMS_SR_clip_masked.tif'):
            SR_file_path = os.path.join(PS_imagery_dir, file_name)
            image_datetime = file_name[:15]
        elif file_name.endswith('metadata_clip.xml'):
            meta_file_path = os.path.join(PS_imagery_dir, file_name)
        else:
            continue

    
    # get the coeffs assoicated with the image bands
    PS_coeffs = extract_PS_8band_coeff_from_xml(meta_file_path)
    
    # opening the raster file to read the bands for EVI calculation and adjust tp TOA
    with rasterio.open(SR_file_path) as PS_raster:
        if PS_raster.count==8:
            band_blue = PS_raster.read(2) * PS_coeffs[2]
            band_greenI = PS_raster.read(3) * PS_coeffs[3]
            band_green = PS_raster.read(4) * PS_coeffs[4]
            band_yellow = PS_raster.read(5) * PS_coeffs[5]
            band_red = PS_raster.read(6) * PS_coeffs[6]
            band_redE = PS_raster.read(7) * PS_coeffs[7]
            band_NIR = PS_raster.read(8) * PS_coeffs[8]
        elif PS_raster.count==4:
            band_blue = PS_raster.read(1) * PS_coeffs[1]
            band_green = PS_raster.read(2) * PS_coeffs[2]
            band_red = PS_raster.read(3) * PS_coeffs[3]
            band_NIR = PS_raster.read(4) * PS_coeffs[4]

        kwargs = PS_raster.meta
        
   
    # Calculate EVI and NDVI
    np.seterr(divide='ignore', invalid='ignore')
    ndvi = (band_NIR.astype(float) - band_red.astype(float)) / (band_NIR + band_red)
    #EVI = 2.5 * (band_NIR.astype(float) - band_red.astype(float))/(band_NIR.astype(float) + 6*band_red.astype(float) - 7*band_blue.astype(float) + 1)
    #EVI = EVI.astype(np.float32)
        
    kwargs.update(dtype=rasterio.float32, count = 1)
    VI_output_path = os.path.join(PS_imagery_dir, 'NDVI', (image_datetime+'_NDVI.tif'))
    with rasterio.open(VI_output_path, 'w', **kwargs) as dst:
        dst.write(ndvi, 1)

In [16]:
PS_imagery_dir = Gatesburg_2022_PS_dir

# Step 1: find the images (and metadata files) for the same date and time
PS_file_list_by_date = group_files_by_date_time(PS_imagery_dir)

# write the VI into a new rasterfile
if not os.path.exists(os.path.join(PS_imagery_dir, 'NDVI')):
    os.makedirs(os.path.join(PS_imagery_dir, 'NDVI'))


# Step 2: extract band coeffs for normalizing the surface reflectance to TOA
for key, value in PS_file_list_by_date.items():
    # Find the surface reflectance and metadata file path for each date and time
    date_time_PS_list = PS_file_list_by_date[key]
    
    for file_name in date_time_PS_list:
        if file_name.endswith('SR_clip_masked.tif')|file_name.endswith('SR_8b_clip_masked.tif')|\
        file_name.endswith('AnalyticMS_clip_masked.tif')|file_name.endswith('AnalyticMS_SR_clip_masked.tif'):
            SR_file_path = os.path.join(PS_imagery_dir, file_name)
            image_datetime = file_name[:15]
        elif file_name.endswith('metadata_clip.xml'):
            meta_file_path = os.path.join(PS_imagery_dir, file_name)
        else:
            continue

    
    # get the coeffs assoicated with the image bands
    PS_coeffs = extract_PS_8band_coeff_from_xml(meta_file_path)
    
    # opening the raster file to read the bands for EVI calculation and adjust tp TOA
    with rasterio.open(SR_file_path) as PS_raster:
        if PS_raster.count==8:
            band_blue = PS_raster.read(2) * PS_coeffs[2]
            band_greenI = PS_raster.read(3) * PS_coeffs[3]
            band_green = PS_raster.read(4) * PS_coeffs[4]
            band_yellow = PS_raster.read(5) * PS_coeffs[5]
            band_red = PS_raster.read(6) * PS_coeffs[6]
            band_redE = PS_raster.read(7) * PS_coeffs[7]
            band_NIR = PS_raster.read(8) * PS_coeffs[8]
        elif PS_raster.count==4:
            band_blue = PS_raster.read(1) * PS_coeffs[1]
            band_green = PS_raster.read(2) * PS_coeffs[2]
            band_red = PS_raster.read(3) * PS_coeffs[3]
            band_NIR = PS_raster.read(4) * PS_coeffs[4]

        kwargs = PS_raster.meta
        
   
    # Calculate EVI and NDVI
    np.seterr(divide='ignore', invalid='ignore')
    ndvi = (band_NIR.astype(float) - band_red.astype(float)) / (band_NIR + band_red)
    #EVI = 2.5 * (band_NIR.astype(float) - band_red.astype(float))/(band_NIR.astype(float) + 6*band_red.astype(float) - 7*band_blue.astype(float) + 1)
    #EVI = EVI.astype(np.float32)
        
    kwargs.update(dtype=rasterio.float32, count = 1)
    VI_output_path = os.path.join(PS_imagery_dir, 'NDVI', (image_datetime+'_NDVI.tif'))
    with rasterio.open(VI_output_path, 'w', **kwargs) as dst:
        dst.write(ndvi, 1)

In [4]:
PS_imagery_dir = Gatesburg_2024_PS_dir

# Step 1: find the images (and metadata files) for the same date and time
PS_file_list_by_date = group_files_by_date_time(PS_imagery_dir)

# write the VI into a new rasterfile
if not os.path.exists(os.path.join(PS_imagery_dir, 'NDVI')):
    os.makedirs(os.path.join(PS_imagery_dir, 'NDVI'))


# Step 2: extract band coeffs for normalizing the surface reflectance to TOA
for key, value in PS_file_list_by_date.items():
    # Find the surface reflectance and metadata file path for each date and time
    date_time_PS_list = PS_file_list_by_date[key]
    
    for file_name in date_time_PS_list:
        if file_name.endswith('SR_clip_masked.tif')|file_name.endswith('SR_8b_clip_masked.tif')|\
        file_name.endswith('AnalyticMS_clip_masked.tif')|file_name.endswith('AnalyticMS_SR_clip_masked.tif'):
            SR_file_path = os.path.join(PS_imagery_dir, file_name)
            image_datetime = file_name[:15]
        elif file_name.endswith('metadata_clip.xml'):
            meta_file_path = os.path.join(PS_imagery_dir, file_name)
        else:
            continue

    
    # get the coeffs assoicated with the image bands
    PS_coeffs = extract_PS_8band_coeff_from_xml(meta_file_path)
    
    # opening the raster file to read the bands for EVI calculation and adjust tp TOA
    with rasterio.open(SR_file_path) as PS_raster:
        if PS_raster.count==8:
            band_blue = PS_raster.read(2) * PS_coeffs[2]
            band_greenI = PS_raster.read(3) * PS_coeffs[3]
            band_green = PS_raster.read(4) * PS_coeffs[4]
            band_yellow = PS_raster.read(5) * PS_coeffs[5]
            band_red = PS_raster.read(6) * PS_coeffs[6]
            band_redE = PS_raster.read(7) * PS_coeffs[7]
            band_NIR = PS_raster.read(8) * PS_coeffs[8]
        elif PS_raster.count==4:
            band_blue = PS_raster.read(1) * PS_coeffs[1]
            band_green = PS_raster.read(2) * PS_coeffs[2]
            band_red = PS_raster.read(3) * PS_coeffs[3]
            band_NIR = PS_raster.read(4) * PS_coeffs[4]

        kwargs = PS_raster.meta
        
   
    # Calculate EVI and NDVI
    np.seterr(divide='ignore', invalid='ignore')
    ndvi = (band_NIR.astype(float) - band_red.astype(float)) / (band_NIR + band_red)
    #EVI = 2.5 * (band_NIR.astype(float) - band_red.astype(float))/(band_NIR.astype(float) + 6*band_red.astype(float) - 7*band_blue.astype(float) + 1)
    #EVI = EVI.astype(np.float32)
        
    kwargs.update(dtype=rasterio.float32, count = 1)
    VI_output_path = os.path.join(PS_imagery_dir, 'NDVI', (image_datetime+'_NDVI.tif'))
    with rasterio.open(VI_output_path, 'w', **kwargs) as dst:
        dst.write(ndvi, 1)

In [18]:
PS_imagery_dir = US_HWB_2017_PS_dir

# Step 1: find the images (and metadata files) for the same date and time
PS_file_list_by_date = group_files_by_date_time(PS_imagery_dir)

# write the VI into a new rasterfile
if not os.path.exists(os.path.join(PS_imagery_dir, 'NDVI')):
    os.makedirs(os.path.join(PS_imagery_dir, 'NDVI'))


# Step 2: extract band coeffs for normalizing the surface reflectance to TOA
for key, value in PS_file_list_by_date.items():
    # Find the surface reflectance and metadata file path for each date and time
    date_time_PS_list = PS_file_list_by_date[key]
    
    for file_name in date_time_PS_list:
        if file_name.endswith('SR_clip_masked.tif')|file_name.endswith('SR_8b_clip_masked.tif')|\
        file_name.endswith('AnalyticMS_clip_masked.tif')|file_name.endswith('AnalyticMS_SR_clip_masked.tif'):
            SR_file_path = os.path.join(PS_imagery_dir, file_name)
            image_datetime = file_name[:15]
        elif file_name.endswith('metadata_clip.xml'):
            meta_file_path = os.path.join(PS_imagery_dir, file_name)
        else:
            continue

    
    # get the coeffs assoicated with the image bands
    PS_coeffs = extract_PS_8band_coeff_from_xml(meta_file_path)
    
    # opening the raster file to read the bands for EVI calculation and adjust tp TOA
    with rasterio.open(SR_file_path) as PS_raster:
        if PS_raster.count==8:
            band_blue = PS_raster.read(2) * PS_coeffs[2]
            band_greenI = PS_raster.read(3) * PS_coeffs[3]
            band_green = PS_raster.read(4) * PS_coeffs[4]
            band_yellow = PS_raster.read(5) * PS_coeffs[5]
            band_red = PS_raster.read(6) * PS_coeffs[6]
            band_redE = PS_raster.read(7) * PS_coeffs[7]
            band_NIR = PS_raster.read(8) * PS_coeffs[8]
        elif PS_raster.count==4:
            band_blue = PS_raster.read(1) * PS_coeffs[1]
            band_green = PS_raster.read(2) * PS_coeffs[2]
            band_red = PS_raster.read(3) * PS_coeffs[3]
            band_NIR = PS_raster.read(4) * PS_coeffs[4]

        kwargs = PS_raster.meta
        
   
    # Calculate EVI and NDVI
    np.seterr(divide='ignore', invalid='ignore')
    ndvi = (band_NIR.astype(float) - band_red.astype(float)) / (band_NIR + band_red)
    #EVI = 2.5 * (band_NIR.astype(float) - band_red.astype(float))/(band_NIR.astype(float) + 6*band_red.astype(float) - 7*band_blue.astype(float) + 1)
    #EVI = EVI.astype(np.float32)
        
    kwargs.update(dtype=rasterio.float32, count = 1)
    VI_output_path = os.path.join(PS_imagery_dir, 'NDVI', (image_datetime+'_NDVI.tif'))
    with rasterio.open(VI_output_path, 'w', **kwargs) as dst:
        dst.write(ndvi, 1)