In [1]:
import os
import numpy as np
import shutil
import glob
import rioxarray
import polars as pl
import os
import numpy as np
import sys
sys.path.append(r"C:\Users\JoaoPereira\Desktop\pygeo\Geospatial-Ops")
import aux_functions as af
import importlib
importlib.reload(af)
from datetime import datetime


In [22]:
input_dir = r"E:\Spotlite_JPereira\LST_DB\2017-jan2024"
output_dir = "E:\Spotlite_JPereira\EGIS\EROP\A24\Extensao\LST"
aoi_shapefile = r"E:\Spotlite_JPereira\EGIS\EROP\A24\Extensao\A24_buffer_5km_wgs84.geojson"
folder_type = "MEDIAN"
directory = input_dir

In [4]:
# loop through all subdirectories
for subdir in os.listdir(directory):
    # check if the subdirectory name starts with "LST10_DC_"
    if subdir.startswith('LST10_DC_'):
        # get the date from the folder name
        date = subdir.split('_')[2][:8]  # Parse the date from the folder name
        # construct the new folder name with the shortened date
        new_folder_name = f'LST10_DC_{date}_GLOBE_GEO_V2.0.1'
        # Construct the paths for the old and new folders
        old_folder_path = os.path.join(directory, subdir)
        new_folder_path = os.path.join(directory, new_folder_name)
        # rename the folder
        os.rename(old_folder_path, new_folder_path)

        # unzip the zipped file inside the subdirectory
        for file in os.listdir(new_folder_path):
            if file.startswith('c_gls_LST10') and file.endswith('.zip'):
                zip_file_path = os.path.join(new_folder_path, file)
                shutil.unpack_archive(zip_file_path, extract_dir=new_folder_path)

        # find the folder with only the date
        for inner_folder in os.listdir(new_folder_path):
            inner_folder_path = os.path.join(new_folder_path, inner_folder)
            if os.path.isdir(inner_folder_path) and len(inner_folder) == 8 and inner_folder.isdigit():
                # Get the date from the inner folder name
                date = inner_folder
                # Copy the desired .tiff file to the output directory with the updated name
                for file in os.listdir(inner_folder_path):
                    if file.endswith(".tiff") and f"c_gls_LST10-DC-{folder_type}" in file:
                        src_path = os.path.join(inner_folder_path, file)
                        dst_filename = f"LST_10_{date}_{folder_type}.tiff"  # Include folder type in the output file name
                        dst_path = os.path.join(output_dir, dst_filename)
                        shutil.copy(src_path, dst_path)

KeyboardInterrupt: 

In [23]:
def lst_calc_daily_mean(input_dir, output_dir, start_date, end_date, clip=False, aoi_shapefile=None):
    # Convert start_date and end_date to datetime objects
    start_date = datetime.strptime(start_date, "%Y-%m-%d")
    end_date = datetime.strptime(end_date, "%Y-%m-%d")

    tiff_files = [f for f in os.listdir(input_dir) if f.endswith("_MEDIAN.tiff")]
    tiff_files = [f for f in tiff_files if is_within_date_range(f, start_date, end_date)]

    # Define the output subdirectory based on clip argument
    mean_subdirectory = os.path.join(output_dir, 'cut')
    os.makedirs(mean_subdirectory, exist_ok=True)

    for file_name in tiff_files:
        input_file_path = os.path.join(input_dir, file_name)
        clipped_file_path = os.path.join(mean_subdirectory, file_name.replace(".tiff", "_clipped.tiff"))

        if clip:
            # Perform clipping first
            af.reproject_clip_resample_tiff(
                input_tiff=input_file_path,
                output_tiff=clipped_file_path,
                aoi_shapefile=aoi_shapefile,
                clip=True
            )
            file_path_to_process = clipped_file_path
        else:
            file_path_to_process = input_file_path

        # Process the raster (clipped or original)
        with rioxarray.open_rasterio(file_path_to_process) as ds:
            ds = ds.rio.write_crs("epsg:4326", inplace=True)

            if ds.rio.count != 24:
                print(f"Unexpected number of bands in {file_name}. Expected 24, found {ds.rio.count}.")
                continue

            ds = ds.where(ds != -8000, np.nan)
            mean_data = ds.mean(dim='band')
            mean_data.rio.write_crs("epsg:4326", inplace=True)

            output_file_name = file_name.replace("_MEDIAN.tiff", "_dailymean.tiff")
            output_file_path = os.path.join(mean_subdirectory, output_file_name)
            mean_data.rio.to_raster(output_file_path)

    # Delete all files ending with _MEDIAN_clipped.tiff
    for file in glob.glob(os.path.join(mean_subdirectory, "*_MEDIAN_clipped.tiff")):
        os.remove(file)

    return mean_subdirectory

In [24]:
def is_within_date_range(file_name, start_date, end_date):
    # Extract the date from the file name
    date_str = file_name.split('_')[2]  # Adjust this to the correct index
    file_date = datetime.strptime(date_str, "%Y%m%d")  # Format matches 'YYYYMMDD'

    return start_date <= file_date <= end_date

In [30]:
start_date="2021-01-01" #YYYY-MM-DD
end_date="2023-10-30"

date_range_str = f"{start_date}_to_{end_date}"

In [26]:
path = lst_calc_daily_mean(input_dir=input_dir, 
                                 output_dir=output_dir, 
                                 start_date=start_date, 
                                 end_date=end_date, 
                                 clip=True,
                                 aoi_shapefile=aoi_shapefile)

In [27]:
def lst_to_df(file, path):
    lst_data = rioxarray.open_rasterio(os.path.join(path, f"{file}.tiff"))[0].rio.write_crs("epsg:4326", inplace=True)
    date = file.split('_')[2]
    lst_data.name = f"LST_{date}"

    return lst_data.to_dataframe()\
        .drop(["band", "spatial_ref"], axis="columns")\
        .reset_index()\
        .reindex(columns=["x", "y", lst_data.name])\
        .rename({"x": "longitude", "y": "latitude"}, axis="columns")

In [31]:
def lst_to_csv(path):
    # Assuming path is already correctly set to either the 'cut' directory or the main directory
    filenames = [os.path.basename(tiff).split(".")[0] for tiff in sorted(glob.glob(os.path.join(path, "*_clipped.tiff")))]

    if not filenames:
        # If no clipped files, try finding dailymean files
        filenames = [os.path.basename(tiff).split(".")[0] for tiff in sorted(glob.glob(os.path.join(path, "*_dailymean.tiff")))]

    if not filenames:
        print("No files found. Check the path and file pattern.")
        return

    # Convert the first file's DataFrame from Pandas to Polars
    lst_merged = pl.DataFrame(lst_to_df(filenames[0], path))

    for file in filenames[1:]:
        # Convert subsequent DataFrames and join
        next_df = pl.DataFrame(lst_to_df(file, path))
        lst_merged = lst_merged.join(next_df, on=["longitude", "latitude"], how="inner")

    # Rename the columns to remove "LST_"
    new_column_names = {col: col.replace("LST_", "") for col in lst_merged.columns if col.startswith("LST_")}
    lst_merged = lst_merged.rename(new_column_names)

    for col in lst_merged.columns[2:]:
        # Fill null values with 0, divide by 100, and then cast to integer # replace by -1?
        transformed_column = lst_merged[col].fill_null(0) / 100
        transformed_column = transformed_column.cast(pl.Int64)
        lst_merged = lst_merged.with_columns(transformed_column.alias(col))

    file_name = f"A24_LST_{date_range_str}.csv"

    # Save the CSV file
    lst_merged.write_csv(os.path.join(path, file_name))

In [32]:
lst_to_csv(path)