### **LIBRARIES USED**

In [1]:
import os
from pathlib import Path
import shutil
import pandas as pd
import rasterio
import geopandas as gpd
import ee
import geemap
import gc

In [2]:
ee.Authenticate()
ee.Initialize(project="ee-bytesizedagric")
print(ee.String('Hello from the Earth Engine servers!').getInfo())

Hello from the Earth Engine servers!


### **LOADING DATASETS**

In [3]:
# dataset
data_path = Path("./sentinel-2/")

train = gpd.read_file("data/train.geojson")
test = gpd.read_file("data/test.geojson")

In [4]:
train.head()

Unnamed: 0,ID,year,crop,class,geometry
0,ID_Mrbi2k,2024,Rubber,3,"POLYGON ((-7.47662 4.79479, -7.47672 4.7947, -..."
1,ID_ORmFYt,2024,Rubber,3,"MULTIPOLYGON (((-7.11499 4.76768, -7.11471 4.7..."
2,ID_2j2bjN,2024,Rubber,3,"MULTIPOLYGON (((-7.09768 4.90044, -7.09732 4.9..."
3,ID_GsdWS5,2024,Rubber,3,"MULTIPOLYGON (((-7.11829 4.76918, -7.11714 4.7..."
4,ID_zEYtR5,2024,Rubber,3,"POLYGON ((-7.51713 4.66244, -7.51711 4.66249, ..."


In [5]:
test.head()

Unnamed: 0,ID,year,geometry
0,ID_UrUGR0,2024,"POLYGON ((-7.56113 4.68498, -7.56119 4.68497, ..."
1,ID_3ZmbBi,2024,"POLYGON ((-7.53978 4.68285, -7.53978 4.68298, ..."
2,ID_tPmH4c,2024,"MULTIPOLYGON (((-7.22294 4.69077, -7.22282 4.6..."
3,ID_rUfFQH,2024,"MULTIPOLYGON (((-7.22599 4.58477, -7.22582 4.5..."
4,ID_RrthDZ,2024,"MULTIPOLYGON (((-7.09795 4.76096, -7.09771 4.7..."


In [6]:
test.shape

(282, 3)

In [7]:
train.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [8]:
def find_matching_tiffs(df, data_path):
    """
    Find and collect TIFF file paths from a directory that match IDs in a GeoDataFrame.

    Parameters:
    - df (DataFrame): A DataFrame containing an 'ID' column.
    - data_path (Path or str): Directory path containing TIFF files.

    Returns:
    - List[Path]: List of matching TIFF file paths.
    """
    data_path = Path(data_path)
    matching_tiffs = []

    for idx, row in df.iterrows():
        field_id = row['ID']
        for tif_file in os.listdir(data_path):
            if field_id in tif_file:
                tif_path = data_path / tif_file
                matching_tiffs.append(tif_path)

    return matching_tiffs

In [9]:
# Get the paths of train/test images
train_tiffs = find_matching_tiffs(train, data_path)
test_tiffs = find_matching_tiffs(test, data_path)

In [10]:
train_tiffs[:5]

[]

### **DATAFRAME WITH THE TIF DATA**

In [11]:
def get_tiff_path(idx, month_idx, year, tif_paths):
    month_str = f"{month_idx:02d}"  # ensures 01, 02, ..., 12
    identifier = f"{idx}_{year}_{month_str}"

    for tif_path in tif_paths:
        if identifier in tif_path.name:
            return tif_path

    return None  # Only return None if no match found

In [12]:
YEAR = 2024
MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

def build_tiff_dataframe(gdf, tif_paths, year=2024, is_train=True, month_limit=12):
    """
    Constructs a DataFrame mapping each ID and month to its corresponding TIFF path.

    Parameters:
    - gdf: GeoDataFrame with 'ID' (and optionally 'crop', 'class' if is_train)
    - tif_paths: list or path collection of TIFF files
    - year: year to use for matching filenames
    - is_train: flag to indicate if it's for training (includes 'crop' and 'class')
    - month_range: limit the data to generating the dataframe for a specific range of months

    Returns:
    - A pandas DataFrame with columns: ID, year, month, tifPath, (optional: crop, class)
    """

    data = []
    for month_idx in range(1, month_limit+1):
        month = MONTHS[month_idx - 1]
        for idx in gdf['ID'].values:
            tif_path = get_tiff_path(idx, month_idx, year, tif_paths)
            entry = {
                "ID": f"{idx}_{month}",
                "year": year,
                "month": month,
                "tifPath": str(tif_path) if tif_path else None,
            }
            if is_train:
                entry["crop"] = gdf.loc[gdf['ID'] == idx, 'crop'].values[0]
                entry["class"] = gdf.loc[gdf['ID'] == idx, 'class'].values[0]
            data.append(entry)

    return pd.DataFrame(data)


def build_tiff_dataframe_single_month(gdf, tif_paths, year=2024, is_train=True, month_idx=1):
    """
    Constructs a DataFrame mapping each ID and month to its corresponding TIFF path.

    Parameters:
    - gdf: GeoDataFrame with 'ID' (and optionally 'crop', 'class' if is_train)
    - tif_paths: list or path collection of TIFF files
    - year: year to use for matching filenames
    - is_train: flag to indicate if it's for training (includes 'crop' and 'class')
    - month_range: limit the data to generating the dataframe for a specific range of months

    Returns:
    - A pandas DataFrame with columns: ID, year, month, tifPath, (optional: crop, class)
    """

    data = []
    month = MONTHS[month_idx - 1]
    for idx in gdf['ID'].values:
      tif_path = get_tiff_path(idx, month_idx, year, tif_paths)
      entry = {
          "ID": f"{idx}_{month}",
          "year": year,
          "month": month,
          "tifPath": str(tif_path) if tif_path else None,
          }
      if is_train:
        entry["crop"] = gdf.loc[gdf['ID'] == idx, 'crop'].values[0]
        entry["class"] = gdf.loc[gdf['ID'] == idx, 'class'].values[0]
      data.append(entry)

    return pd.DataFrame(data)

In [13]:
## Build a toy train dataframe for a single month (January)
df_train = build_tiff_dataframe_single_month(train, 'sentinel-2', 
                                             year=YEAR, is_train=True, month_idx=1)

AttributeError: 'str' object has no attribute 'name'