# Test file 

In [1]:
import os
import pandas as pd
import json
import rasterio
from rasterio.merge import merge
from shapely.geometry import shape, mapping
from rasterio.warp import transform_geom
from rasterio.windows import from_bounds, transform as window_transform

In [2]:
# example dir 
tif_dir = "E:/GoogleDrive/samarra_exports_l2_neu/"
prefix = "s2_ndvi_samarra_"
postfix = "_" # ... "01_2017-04-30-0000000000-0000000000.tif"
# "s2_ndvi_samarra_2017-04-01_2017-04-30-0000000000-0000000000.tif"

In [3]:
# get list of all tifs in directory
lst = []

for file in os.listdir(tif_dir):
    if file.endswith(".tif"):
        lst.append(file)

print(f"Found {len(lst)} tif files in directory: {tif_dir}")

Found 474 tif files in directory: E:/GoogleDrive/samarra_exports_l2_neu/


In [4]:
# create a df 
df = pd.DataFrame(lst, columns=["file_name"])

# extract the date from the file name
df["date"] = df["file_name"].str.extract(r'(\d{4}-\d{2}-\d{2})')


In [5]:
# Load AOI Geometry 
aoi_path = r"F:\0-Projects\GEE-UNESCO\COMM514_A_12_202425\aoi.geojson"
with open(aoi_path, "r") as f:
    geojson = json.load(f)
    aoi_geom = shape(geojson["features"][0]["geometry"])

# # Get earliest date from dataframe
# date = df["date"].min()
# min_df = df[df["date"] == date]

# print(f"Files for date {date}:")
# print(min_df)

def combine_files(files_df):
    # set min date 
    date = files_df["date"].min()

    # Collect raster sources
    src_files = []
    print("Opening raster files:")
    for _, row in files_df.iterrows():
        for file_name in row["file_name"].split(","):
            full_path = os.path.join(tif_dir, file_name.strip())
            if os.path.exists(full_path):
                print(f"  {full_path}")
                src = rasterio.open(full_path)
                src_files.append(src)

    if not src_files:
        print("No valid .tif files found.")
        exit()

    # Merge raster tiles
    mosaic, out_transform = merge(src_files)
    print(f"Merged {len(src_files)} files.")

    # Keep only bands 1–12
    mosaic = mosaic[:12] if mosaic.shape[0] >= 12 else mosaic

    # Prepare metadata
    meta = src_files[0].meta.copy()
    meta.update({
        "driver": "GTiff",
        "descriptions": src_files[0].descriptions[:mosaic.shape[0]],
        "height": mosaic.shape[1],
        "width": mosaic.shape[2],
        "count": mosaic.shape[0],
        "dtype": mosaic.dtype,
        "crs": src_files[0].crs,
        "transform": out_transform,
        "compress": "lzw",
        "tiled": True
    })

    # Store band names for naming later
    band_names = src_files[0].descriptions[:mosaic.shape[0]]

    # Reproject AOI to raster CRS
    aoi_geom_proj = transform_geom(
        src_crs="EPSG:4326",
        dst_crs=src_files[0].crs,
        geom=mapping(aoi_geom)
    )
    aoi_bounds = shape(aoi_geom_proj).bounds  # minx, miny, maxx, maxy

    # Compute aligned pixel window
    window = from_bounds(*aoi_bounds, transform=out_transform)
    window = window.round_offsets().round_lengths()  # snap to pixel grid

    # Slice mosaic using aligned window
    clipped = mosaic[:, int(window.row_off):int(window.row_off + window.height),
                        int(window.col_off):int(window.col_off + window.width)]

    # Compute new transform for clipped region
    clipped_transform = window_transform(window, out_transform)

    # Update metadata for clipped output
    clipped_meta = meta.copy()
    clipped_meta.update({
        "height": clipped.shape[1],
        "width": clipped.shape[2],
        "transform": clipped_transform
    })

    # Save final clipped file
    final_output = os.path.join(r"E:\test_output", f"{date}.tif")
    with rasterio.open(final_output, "w", **clipped_meta) as dst:
        dst.write(clipped)

        # Save band descriptions - will not do unless done EXPLICITLY >:(
        for i, name in enumerate(band_names, start=1):
            dst.set_band_description(i, name)

    print(f"Saved clipped mosaic: {final_output}")

    # Close all source files
    for src in src_files:
        src.close()

In [6]:
# # print all the layer descriptions
# print("Layer Descriptions:")
# for i, desc in enumerate(clipped_meta["descriptions"]):
#     print(f"Band {i+1}: {desc}")
    

In [7]:
# split df into sets of months so that each month can be calculated separately

def split_df_by_month(df):
    df['time_stamp'] = pd.to_datetime(df['date'])
    df['month'] = df['time_stamp'].dt.to_period('M')
    df.drop(columns=['time_stamp'], inplace=True)
    
    month_groups = df.groupby('month')
    
    month_dfs = {}
    for month, group in month_groups:
        month_dfs[month] = group.reset_index(drop=True)
    
    return month_dfs

# Example usage
month_dfs = split_df_by_month(df)
for month, files_df in month_dfs.items():
    print(f"Month: {month}")
    # print(files_df)

    combine_files(files_df)

Month: 2019-01
Opening raster files:
  E:/GoogleDrive/samarra_exports_l2_neu/s2_ndvi_samarra_2019-01-01_2019-01-31-0000000000-0000000000.tif
  E:/GoogleDrive/samarra_exports_l2_neu/s2_ndvi_samarra_2019-01-01_2019-01-31-0000000000-0000004608.tif
  E:/GoogleDrive/samarra_exports_l2_neu/s2_ndvi_samarra_2019-01-01_2019-01-31-0000000000-0000009216.tif
  E:/GoogleDrive/samarra_exports_l2_neu/s2_ndvi_samarra_2019-01-01_2019-01-31-0000004608-0000000000.tif
  E:/GoogleDrive/samarra_exports_l2_neu/s2_ndvi_samarra_2019-01-01_2019-01-31-0000004608-0000004608.tif
  E:/GoogleDrive/samarra_exports_l2_neu/s2_ndvi_samarra_2019-01-01_2019-01-31-0000004608-0000009216.tif
Merged 6 files.
Saved clipped mosaic: E:\test_output\2019-01-01.tif
Month: 2019-02
Opening raster files:
  E:/GoogleDrive/samarra_exports_l2_neu/s2_ndvi_samarra_2019-02-01_2019-02-28-0000000000-0000000000.tif
  E:/GoogleDrive/samarra_exports_l2_neu/s2_ndvi_samarra_2019-02-01_2019-02-28-0000000000-0000004608.tif
  E:/GoogleDrive/samarra_e