In [18]:
import os
import rasterio
import numpy as np
import geopandas as gpd
import numpy as np
from joblib import load
from rasterio.transform import from_origin
from rasterio.features import geometry_mask
from datetime import datetime, timedelta
from rasterio.sample import sample_gen
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, BoundaryNorm
import contextily as ctx  # For OpenStreetMap basemaps

def extract_datetime_from_filename(filename):
    """
    Extract year, month, day, and hour from a raster filename containing DOY.

    Args:
        filename (str): Raster filename with DOY (e.g., 'ECO2LSTE.001_SDS_LST_doy2023156110712_aid0001.tif').

    Returns:
        dict: Dictionary with keys 'year', 'month', 'day', 'hour', 'minute', 'second'.
    """
    # Extract the DOY and timestamp information from the filename
    parts = filename.split('_doy')
    if len(parts) < 2:
        raise ValueError("Filename does not contain DOY information.")

    doy_part = parts[1].split('_')[0]
    year = int(doy_part[:4])
    doy = int(doy_part[4:7])
    hour = int(doy_part[7:9])
    minute = int(doy_part[9:11])
    second = int(doy_part[11:13])

    # Convert DOY to month and day
    date = datetime(year, 1, 1) + timedelta(days=doy - 1)
    month = date.month
    day = date.day

    return {
        'year': year,
        'month': month,
        'day': day,
        'hour': hour,
        'minute': minute,
        'second': second,
    }

# Helper function to extract data within bounds
def extract_raster_data(raster_file, min_lon, max_lon, min_lat, max_lat):
    with rasterio.open(raster_file) as src:
        transform = src.transform
        raster_data = src.read(1)  # Read the first band
        raster_data = np.where(raster_data == src.nodata, np.nan, raster_data)  # Replace nodata with NaN
        # Calculate row/col indices for the bounding box
        min_col = int((min_lon - transform[2]) / transform[0])
        max_col = int((max_lon - transform[2]) / transform[0])
        min_row = int((max_lat - transform[5]) / transform[4])
        max_row = int((min_lat - transform[5]) / transform[4])

        # Subset the raster data
        subset_data = raster_data[min_row:max_row, min_col:max_col]
        extent = (
            transform[2] + min_col * transform[0],  # min X (lon)
            transform[2] + max_col * transform[0],  # max X (lon)
            transform[5] + max_row * transform[4],  # min Y (lat)
            transform[5] + min_row * transform[4],  # max Y (lat)
        )
        return subset_data, extent

def get_ecostress_files(directory, keyword="LST_doy"):
    """Get the list of Ecostress LST files containing the specified keyword."""
    return [os.path.join(directory, file) for file in os.listdir(directory) if keyword in file]

def extract_datetime_from_filename(filename):
    """
    Extract year, month, day, and hour from a raster filename containing DOY.

    Args:
        filename (str): Raster filename with DOY (e.g., 'ECO2LSTE.001_SDS_LST_doy2023156110712_aid0001.tif').

    Returns:
        dict: Dictionary with keys 'year', 'month', 'day', 'hour', 'minute', 'second'.
    """
    # Extract the DOY and timestamp information from the filename
    parts = filename.split('_doy')
    if len(parts) < 2:
        raise ValueError("Filename does not contain DOY information.")

    doy_part = parts[1].split('_')[0]
    year = int(doy_part[:4])
    doy = int(doy_part[4:7])
    hour = int(doy_part[7:9])
    minute = int(doy_part[9:11])
    second = int(doy_part[11:13])

    # Convert DOY to month and day
    date = datetime(year, 1, 1) + timedelta(days=doy - 1)
    month = date.month
    day = date.day

    return {
        'year': year,
        'month': month,
        'day': day,
        'hour': hour,
        'minute': minute,
        'second': second,
    }

def get_msg_lst_file(datetime_info, directory="../downloads/MSG_2ND"):
    """Construct the MSG LST filename for a specific date and time."""
    return os.path.join(directory, f'LST_{datetime_info["year"]}-{datetime_info["month"]:02d}-{datetime_info["day"]:02d}.gpkg')

def filter_gdf_by_datetime(gdf, datetime_info):
    """Filter GeoDataFrame by year, month, day, and hour."""
    return gdf[
        (gdf['year'] == datetime_info["year"]) &
        (gdf['month'] == datetime_info["month"]) &
        (gdf['day'] == datetime_info["day"]) &
        (gdf['hour'] == datetime_info["hour"])
    ]


In [19]:

def create_raster_from_points(filtered_gdf, resolution):
    """Generate a raster from GeoDataFrame points and their LST values."""
    min_x, min_y, max_x, max_y = filtered_gdf.total_bounds
    cols = int((max_x - min_x) / resolution) + 1
    rows = int((max_y - min_y) / resolution) + 1

    raster = np.full((rows, cols), -9999, dtype=np.float32)  # nodata value
    transform = from_origin(min_x, max_y, resolution, resolution)

    for x, y, val in zip(filtered_gdf.geometry.x, filtered_gdf.geometry.y, filtered_gdf['temperature']):
        col = int((x - min_x) / resolution)
        row = int((max_y - y) / resolution)
        if 0 <= col < cols and 0 <= row < rows:
            raster[row, col] = val

    return raster, transform


In [21]:

def save_raster(output_path, raster, transform, crs, nodata=-9999):
    """Save raster to a GeoTIFF file."""
    with rasterio.open(
        output_path,
        'w',
        driver='GTiff',
        height=raster.shape[0],
        width=raster.shape[1],
        count=1,
        dtype=raster.dtype,
        crs=crs,
        transform=transform,
        nodata=nodata
    ) as dst:
        dst.write(raster, 1)


In [22]:
from scipy.spatial import cKDTree
import numpy as np

def update_predictions(lu_gdf, coarse_gdf, predictors, loaded_model):
    """Update predictions in the land-use GeoDataFrame."""
    coords = np.array([(point.x, point.y) for point in lu_gdf.geometry])
    coarse_coords = np.array([(point.x, point.y) for point in coarse_gdf.geometry])
    coarse_temps = coarse_gdf['temperature'].values

    # Build a spatial index for the coarse grid points
    tree = cKDTree(coarse_coords)
    _, indices = tree.query(coords, k=1)  # Find the nearest neighbor

    # Get the temperature from the closest coarse_gdf point
    lu_gdf['msg_lst_temperature'] = coarse_temps[indices]

    # Filter out rows with nodata temperature values
    lu_gdf_filtered = lu_gdf[lu_gdf['msg_lst_temperature'] != -9999].copy()
    lu_gdf_filtered['msg_lst_temperature'] += 273.15  # Convert to Kelvin
    lu_gdf_filtered['month'] = datetime_info["month"]
    lu_gdf_filtered['hour'] = datetime_info["hour"]

    # If all predictors are 0 then drop the row
    lu_gdf_filtered = lu_gdf_filtered[(lu_gdf_filtered[predictors[:-2]] != 0).any(axis=1)].copy()

    # Predict LST
    lu_gdf_filtered['temperature'] = loaded_model.predict(lu_gdf_filtered[predictors])

    lu_gdf.loc[lu_gdf_filtered.index, 'temperature'] = lu_gdf_filtered['temperature']

    # Convert to Celsius
    lu_gdf.loc[lu_gdf['temperature'] != -9999, 'temperature'] -= 273.15


In [23]:


# def update_predictions(lu_gdf, src, predictors, loaded_model):
#     """Update predictions in the land-use GeoDataFrame."""
#     coords = [(point.x, point.y) for point in lu_gdf.geometry]
#     lu_gdf['msg_lst_temperature'] = [
#         val[0] if val else -9999 for val in sample_gen(src, coords)
#     ]

#     lu_gdf_filtered = lu_gdf[lu_gdf['msg_lst_temperature'] != -9999].copy()
#     lu_gdf_filtered['msg_lst_temperature'] += 273.15
#     lu_gdf_filtered['month'] = datetime_info["month"]
#     lu_gdf_filtered['hour'] = datetime_info["hour"]

#     # If all predictors are 0 then drop the row
#     lu_gdf_filtered = lu_gdf_filtered[(lu_gdf_filtered[predictors[:-2]] != 0).any(axis=1)].copy()

#     # Predict LST
#     lu_gdf_filtered['temperature'] = loaded_model.predict(lu_gdf_filtered[predictors])

#     lu_gdf.loc[lu_gdf_filtered.index, 'temperature'] = lu_gdf_filtered['temperature']

#     # Convert to Celsius
#     lu_gdf.loc[lu_gdf['temperature'] != -9999, 'temperature'] -= 273.15


In [24]:
ecostress_dir = "../downloads/ECOSTRESS_LST"
ecostress_files = get_ecostress_files(ecostress_dir)


In [25]:
ecostress_files = ecostress_files[len(ecostress_files)-2:len(ecostress_files)-1]

In [26]:
ecostress_files

['../downloads/ECOSTRESS_LST/ECO2LSTE.001_SDS_LST_doy2024217103456_aid0001.tif']

In [27]:

# Step 1: Load the trained model
model_filename = "random_forest_model.pkl"
loaded_model = load(model_filename)

# Step 2: Load land use profiles for the geometry points in the LU profile file
lu_profile_file = "../lu_profiles/rome_2023_landuse_profile_35m.gpkg"
lu_gdf = gpd.read_file(lu_profile_file)

predictors = ['trees', 'water', 'crop', 'built_area', 'range_land', 'msg_lst_temperature', 'month', 'hour']
coarse_resolution = 0.05 # Approximately 5000 meters
fine_resolution = 0.0006309  # Approximately 70 meters

In [28]:

for ecostress_raster_file in ecostress_files:

    # Extract date and time info from filename
    datetime_info = extract_datetime_from_filename(os.path.basename(ecostress_raster_file))

    # Create MSG LST data file name for the extracted date and hour
    msg_lst_filename = get_msg_lst_file(datetime_info)

    # Checj if the file exists
    if not os.path.exists(msg_lst_filename):
        continue

    print(msg_lst_filename)

    # Read the file
    gdf = gpd.read_file(msg_lst_filename)

    print(gdf["hour"].unique())
    # Filter by the hour
    filtered_gdf = filter_gdf_by_datetime(gdf, datetime_info)

    print(filtered_gdf["hour"].unique())

    if filtered_gdf.empty:
        print("No data found for the specified date and time.")
        continue

    coarse_resolution = 0.05
    raster, transform = create_raster_from_points(filtered_gdf, coarse_resolution)
    output_raster = f'../lst_rasters/MSG-LST-{datetime_info["year"]}-{datetime_info["month"]:02d}-{datetime_info["day"]:02d}-{datetime_info["hour"]:02d}.tif'
    save_raster(output_raster, raster, transform, filtered_gdf.crs)

    update_predictions(lu_gdf, filtered_gdf, predictors, loaded_model)

    # with rasterio.open(output_raster) as src:
    #     update_predictions(lu_gdf, src, predictors, loaded_model)

    predicted_raster, predicted_transorm = create_raster_from_points(lu_gdf, fine_resolution)

    output_predicted_raster = f'../lst_rasters/MSG-LST-DOWNSCALED-{datetime_info["year"]}-{datetime_info["month"]:02d}-{datetime_info["day"]:02d}-{datetime_info["hour"]:02d}.tif'
    save_raster(output_predicted_raster, predicted_raster, predicted_transorm, lu_gdf.crs)

    print(f"Downscaled raster saved: {output_predicted_raster}")


../downloads/MSG_2ND/LST_2024-08-04.gpkg
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[10]


  avg_distance = filtered_gdf.geometry.apply(lambda point: point.distance(filtered_gdf.geometry.unary_union.centroid)).mean()


Downscaled raster saved: ../lst_rasters/MSG-LST-DOWNSCALED-2024-08-04-10.tif


In [29]:
# Define the extent (bounding box) to extract (in geographic coordinates)
min_lon, max_lon = 12.53514, 12.58
min_lat, max_lat = 41.84959, 41.90

for ecostress_raster_file in ecostress_files:

    datetime_info = extract_datetime_from_filename(os.path.basename(ecostress_raster_file))
    msg_lst_filename = get_msg_lst_file(datetime_info)

    # Load the coarse, fine, and Ecostress raster files
    coarse_raster_file = f'../lst_rasters/MSG-LST-{datetime_info["year"]}-{datetime_info["month"]:02d}-{datetime_info["day"]:02d}-{datetime_info["hour"]:02d}.tif'

    if not os.path.exists(coarse_raster_file):
        continue

    fine_raster_file = f'../lst_rasters/MSG-LST-DOWNSCALED-{datetime_info["year"]}-{datetime_info["month"]:02d}-{datetime_info["day"]:02d}-{datetime_info["hour"]:02d}.tif'
    if not os.path.exists(fine_raster_file):
        continue

    print(f'{coarse_raster_file}, {fine_raster_file}')

    # Extract the subsets for all rasters
    coarse_data, coarse_extent = extract_raster_data(coarse_raster_file, min_lon, max_lon, min_lat, max_lat)
    fine_data, fine_extent = extract_raster_data(fine_raster_file, min_lon, max_lon, min_lat, max_lat)
    ecostress_data, ecostress_extent = extract_raster_data(ecostress_raster_file, min_lon, max_lon, min_lat, max_lat)

    # Mask temperatures less than 5°C by setting them to NaN
    ecostress_data = np.where(
        (ecostress_data * 0.02 - 273.15) < 20,  # Check if temperature < 5°C
        np.nan,                                # Set to NaN if condition is met
        (ecostress_data * 0.02) - 273.15       # Otherwise, scale and convert to Celsius
    )

    # Mask temperatures greater than 50°C by setting them to NaN
    ecostress_data = np.where(ecostress_data > 50, np.nan, ecostress_data) # Set to NaN if condition is met

    try:

        if np.isnan(np.nanmin(coarse_data)) or np.isnan(np.nanmin(fine_data)):
            continue

        print(f'{np.nanmin(coarse_data)}, {np.nanmin(fine_data)}, {np.nanmin(ecostress_data)}')
        print(f'{np.nanmax(coarse_data)}, {np.nanmax(fine_data)}, {np.nanmax(ecostress_data)}')

        # Ensure both arrays have the same shape by slicing the larger one
        if fine_data.shape != ecostress_data.shape:
            # Find the minimum row and column sizes
            min_rows = min(fine_data.shape[0], ecostress_data.shape[0])
            min_cols = min(fine_data.shape[1], ecostress_data.shape[1])

            # Slice the arrays to the same size
            fine_data = fine_data[:min_rows, :min_cols]
            ecostress_data = ecostress_data[:min_rows, :min_cols]

        # Calculate temperature difference between fine and Ecostress
        temperature_diff = fine_data - ecostress_data

        # Set the difference to 0 if either fine_data or ecostress_data contains NaN
        temperature_diff = np.where(
            np.isnan(fine_data) | np.isnan(ecostress_data),  # Condition: Either value is NaN
            0,                                              # Set to 0
            temperature_diff                                # Otherwise, keep the difference
        )

        # Define temperature bins (1 degree Celsius each)
        temp_min = min(np.nanmin(coarse_data), np.nanmin(fine_data), np.nanmin(ecostress_data))
        temp_max = max(np.nanmax(coarse_data), np.nanmax(fine_data), np.nanmax(ecostress_data))
        bins = np.arange(np.floor(temp_min), np.ceil(temp_max) + 1, 1)  # 1-degree bins

        from matplotlib.colors import TwoSlopeNorm

        # Define temperature difference colormap (e.g., shades of blue to red)
        cmap_diff = plt.cm.coolwarm

        # Create a TwoSlopeNorm to center the color scale at 0
        norm_diff = TwoSlopeNorm(vmin=np.nanmin(temperature_diff), vmax=np.nanmax(temperature_diff), vcenter=0)

        # Create a standard colormap for the other plots (using 'viridis')
        cmap_standard = plt.cm.viridis
        norm_standard = BoundaryNorm(bins, cmap_standard.N)

        # Plot the maps
        fig, axes = plt.subplots(4, 1, figsize=(10, 16), sharex=True, sharey=True)

        # Plot the coarse raster
        ax = axes[0]
        im = ax.imshow(coarse_data, cmap=cmap_standard, norm=norm_standard, extent=coarse_extent, interpolation='nearest')
        ax.set_title("Coarse Resolution LST")
        ax.set_xlabel("Longitude")
        ax.set_ylabel("Latitude")
        plt.colorbar(im, ax=ax, label="Temperature (°C)", ticks=bins)

        # Plot the fine raster
        ax = axes[1]
        im = ax.imshow(fine_data, cmap=cmap_standard, norm=norm_standard, extent=fine_extent, interpolation='nearest')
        ax.set_title("Fine Resolution LST")
        ax.set_xlabel("Longitude")
        ax.set_ylabel("Latitude")
        plt.colorbar(im, ax=ax, label="Temperature (°C)", ticks=bins)

        # Plot the Ecostress raster
        ax = axes[2]
        im = ax.imshow(ecostress_data, cmap=cmap_standard, norm=norm_standard, extent=ecostress_extent, interpolation='nearest')
        ax.set_title("Ecostress LST")
        ax.set_xlabel("Longitude")
        ax.set_ylabel("Latitude")
        plt.colorbar(im, ax=ax, label="Temperature (°C)", ticks=bins)

        # Plot the difference (fine - ecostress) using cmap_diff (blue to red)
        ax = axes[3]
        im = ax.imshow(temperature_diff, cmap=cmap_diff, norm=norm_diff, extent=fine_extent, interpolation='nearest')
        ax.set_title("Difference (Fine - Ecostress) LST")
        ax.set_xlabel("Longitude")
        ax.set_ylabel("Latitude")
        plt.colorbar(im, ax=ax, label="Temperature Difference (°C)")

        plot_filename = f'../LST_MAP_COMPARISON/LST-VILLA-DI-SANTIS-{datetime_info["year"]}-{datetime_info["month"]:02d}-{datetime_info["day"]:02d}-{datetime_info["hour"]:02d}.png'

        # Final layout adjustments
        plt.tight_layout()

        # Save the plot
        plt.savefig(plot_filename)
        plt.show()
    except Exception as e:
        print(e)

print("Done")


../lst_rasters/MSG-LST-2024-08-04-10.tif, ../lst_rasters/MSG-LST-DOWNSCALED-2024-08-04-10.tif
zero-size array to reduction operation fmin which has no identity
Done
