# Test Asset Manifest

### 1. Imports

In [8]:
from pathlib import Path
import pandas as pd

from thess_geo_analytics.pipelines.BuildAssetsManifestPipeline import (
    BuildAssetsManifestPipeline,
    BuildAssetsManifestParams,
)
from thess_geo_analytics.utils.RepoPaths import RepoPaths

pd.set_option("display.max_columns", None)
pd.set_option("display.width", 120)


### 2. Check scenes_selected.csv

In [9]:
scenes_path = RepoPaths.table("scenes_selected.csv")
scenes_path



WindowsPath('C:/Users/alexr/OneDrive/Desktop/thess-geo-analytics/outputs/tables/scenes_selected.csv')

In [10]:
df_scenes = pd.read_csv(scenes_path, parse_dates=["datetime"])
df_scenes.head()


Unnamed: 0,anchor_date,acq_datetime,id,datetime,cloud_cover,platform,constellation,collection
0,2025-03-02,2025-03-05 09:09:11.025000+00:00,S2C_MSIL2A_20250305T090911_N0511_R050_T34TFK_2...,2025-03-05 09:09:11.025000+00:00,0.019039,sentinel-2c,sentinel-2,sentinel-2-l2a
1,2025-04-01,2025-04-09 09:05:59.024000+00:00,S2B_MSIL2A_20250409T090559_N0511_R050_T34TFK_2...,2025-04-09 09:05:59.024000+00:00,1.676979,sentinel-2b,sentinel-2,sentinel-2-l2a
2,2025-05-02,2025-04-29 09:20:41.024000+00:00,S2A_MSIL2A_20250429T092041_N0511_R093_T34TFL_2...,2025-04-29 09:20:41.024000+00:00,0.308957,sentinel-2a,sentinel-2,sentinel-2-l2a
3,2025-06-01,2025-06-08 09:05:49.024000+00:00,S2B_MSIL2A_20250608T090549_N0511_R050_T34TFK_2...,2025-06-08 09:05:49.024000+00:00,0.004256,sentinel-2b,sentinel-2,sentinel-2-l2a
4,2025-07-01,2025-07-08 09:05:59.024000+00:00,S2B_MSIL2A_20250708T090559_N0511_R050_T35TKE_2...,2025-07-08 09:05:59.024000+00:00,0.002644,sentinel-2b,sentinel-2,sentinel-2-l2a


In [11]:
print("Rows:", len(df_scenes))
print("Date range:", df_scenes["datetime"].min(), "→", df_scenes["datetime"].max())
print("Unique scenes:", df_scenes["id"].nunique())


Rows: 11
Date range: 2025-03-05 09:09:11.025000+00:00 → 2025-12-28 09:23:19.024000+00:00
Unique scenes: 11


### 3. Buidl Assets Manifests (No DOwnload)

##### All Scenes

In [12]:
pipe = BuildAssetsManifestPipeline()

out_path = pipe.run(
    BuildAssetsManifestParams(
        max_scenes=None,        # None = all scenes
        download_n=0,           # 0 = do not download
        download_missing=False,
        validate_rasterio=False,
        out_name="assets_manifest_selected.csv",
    )
)

out_path



[OK] Assets manifest exported => C:\Users\alexr\OneDrive\Desktop\thess-geo-analytics\outputs\tables\assets_manifest_selected.csv
[OK] Scenes in manifest: 11


WindowsPath('C:/Users/alexr/OneDrive/Desktop/thess-geo-analytics/outputs/tables/assets_manifest_selected.csv')

In [13]:
out_path = pipe.run(
    BuildAssetsManifestParams(
        max_scenes=50,
        sort_mode="cloud_then_time",
        download_n=0,
        download_missing=False,
        validate_rasterio=False,
        out_name="assets_manifest_top50.csv",
    )
)

out_path


[OK] Assets manifest exported => C:\Users\alexr\OneDrive\Desktop\thess-geo-analytics\outputs\tables\assets_manifest_top50.csv
[OK] Scenes in manifest: 11


WindowsPath('C:/Users/alexr/OneDrive/Desktop/thess-geo-analytics/outputs/tables/assets_manifest_top50.csv')

##### Exmaple : Date range filtering

In [16]:
out_path = pipe.run(
    BuildAssetsManifestParams(
        date_start="2022-01-01",
        date_end="2025-12-31",
        max_scenes=None,
        download_n=0,
        download_missing=False,
        validate_rasterio=False,
        out_name="assets_manifest_2022.csv",
    )
)

out_path


[OK] Assets manifest exported => C:\Users\alexr\OneDrive\Desktop\thess-geo-analytics\outputs\tables\assets_manifest_2022.csv
[OK] Scenes in manifest: 11


WindowsPath('C:/Users/alexr/OneDrive/Desktop/thess-geo-analytics/outputs/tables/assets_manifest_2022.csv')

### 4. Inspect Manifest

In [17]:
manifest_path = RepoPaths.table("assets_manifest_selected.csv")
df_manifest = pd.read_csv(manifest_path)
df_manifest.head()


Unnamed: 0,scene_id,datetime,cloud_cover,href_b04,href_b08,href_scl,local_b04,local_b08,local_scl
0,S2C_MSIL2A_20250305T090911_N0511_R050_T34TFK_2...,2025-03-05T09:09:11.025000+00:00,0.019039,https://zipper.dataspace.copernicus.eu/odata/v...,https://zipper.dataspace.copernicus.eu/odata/v...,https://zipper.dataspace.copernicus.eu/odata/v...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...
1,S2B_MSIL2A_20250409T090559_N0511_R050_T34TFK_2...,2025-04-09T09:05:59.024000+00:00,1.676979,https://zipper.dataspace.copernicus.eu/odata/v...,https://zipper.dataspace.copernicus.eu/odata/v...,https://zipper.dataspace.copernicus.eu/odata/v...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...
2,S2A_MSIL2A_20250429T092041_N0511_R093_T34TFL_2...,2025-04-29T09:20:41.024000+00:00,0.308957,https://zipper.dataspace.copernicus.eu/odata/v...,https://zipper.dataspace.copernicus.eu/odata/v...,https://zipper.dataspace.copernicus.eu/odata/v...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...
3,S2B_MSIL2A_20250608T090549_N0511_R050_T34TFK_2...,2025-06-08T09:05:49.024000+00:00,0.004256,https://zipper.dataspace.copernicus.eu/odata/v...,https://zipper.dataspace.copernicus.eu/odata/v...,https://zipper.dataspace.copernicus.eu/odata/v...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...
4,S2B_MSIL2A_20250708T090559_N0511_R050_T35TKE_2...,2025-07-08T09:05:59.024000+00:00,0.002644,https://zipper.dataspace.copernicus.eu/odata/v...,https://zipper.dataspace.copernicus.eu/odata/v...,https://zipper.dataspace.copernicus.eu/odata/v...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...,C:\Users\alexr\OneDrive\Desktop\thess-geo-anal...


In [18]:
print("Rows:", len(df_manifest))
print("Missing href rows:",
      df_manifest[["href_b04","href_b08","href_scl"]].isna().any(axis=1).sum())


Rows: 11
Missing href rows: 0


### 5. Controlled Download

In [None]:
out_path = pipe.run(
    BuildAssetsManifestParams(
        max_scenes=None,
        download_missing=True,
        validate_rasterio=True,
        out_name="assets_manifest_test_download.csv",
    )
)



[OK] Assets manifest exported => C:\Users\alexr\OneDrive\Desktop\thess-geo-analytics\outputs\tables\assets_manifest_test_download.csv
[OK] Scenes in manifest: 11


TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'