In [2]:
from scripts.download_s2_pc_by_tile import run as run_download
from scripts.mean_indices import run as run_indices
from scripts.train_and_predict_builtup import run as run_builtup


# Download sentinel 2 images 

In [3]:
# ðŸš€ Run the downloader
run_download(outdir="data/sentinel",                   # relative to project root
    aoi_path="data/aoi/CMDA.shp",
    year=2025,
    cloud_max=1.0,
    bands10=["B02", "B03", "B04", "B08", "TCI"],
    bands20=["B11", "SCL"],
    max_workers=6,
    retry_count=2,
    timeout=90,)


2025-12-08 11:20:08,111 INFO: Searching sentinel-2-l2a for year 2025 (cloud <10.0%)
2025-12-08 11:20:09,330 INFO: Found 28 items
2025-12-08 11:20:09,331 INFO: Tiles detected: ['T44PLV', 'T44PMV']
2025-12-08 11:20:09,332 INFO: Wrote tile list: data/sentinel/tile_list.txt
2025-12-08 11:20:09,332 INFO: Total download tasks: 196

KeyboardInterrupt



# Calculating mean indices [NDVI, NDBI,BSI,NDWI]

In [4]:
run_indices(
    root="data/sentinel",
#     tiles=["T44PLV"],                        # process only this tile
    overwrite=False,
    use_scl=True,
    threads=4,
    outputs=["mean_ndvi", "mean_ndbi", "mean_bsi", "mean_ndwi"],
    compute_median=True,                    # keep medians because you want median_ndwi
)

2025-12-08 14:58:15,688 INFO: Found tiles: 2
2025-12-08 14:58:15,689 INFO: Processing tile T44PLV
2025-12-08 14:58:15,691 INFO: All requested outputs already exist for tile T44PLV â€” skipping.
2025-12-08 14:58:15,692 INFO: Processing tile T44PMV
2025-12-08 14:58:15,694 INFO: All requested outputs already exist for tile T44PMV â€” skipping.


# Model and predictions

In [4]:
# from scripts.train_and_predict_builtup import run as run_builtup

clf, summary_df = run_builtup(
    root="data/sentinel",
    train_vector="data/training/CMDA_overall.shp",
    class_col="class",
    tiles=["T44PLV", "T44PMV"],
    feature_set="mean",          # or "median" or "mean_median"
    max_samples_per_poly=100,
    n_trees=200,                 # <- used in filename suffix _200.tif
    random_state=42,
    prob_threshold=0.7,
    out_model="output/model/builtup_rf_mean_200.joblib",
    train_summary="output/model/training_summary_mean_200.csv",
    overwrite_predictions=False,
)

2025-12-08 20:18:12,191 INFO: Loading training vector: /Volumes/backup_a/project/sentinel2_builtup_pipeline-main/data/training/CMDA_overall.shp
2025-12-08 20:18:12,632 INFO: Using 2 tiles for training/prediction
2025-12-08 20:18:17,765 INFO: Built training matrix: (10026, 4)
2025-12-08 20:18:17,766 INFO: Training samples: (10026, 4)  Class distribution: [6387 3639]
2025-12-08 20:18:18,201 INFO: Classification report (hold-out):
              precision    recall  f1-score   support

           0       0.97      0.95      0.96      1278
           1       0.92      0.95      0.93       728

    accuracy                           0.95      2006
   macro avg       0.94      0.95      0.95      2006
weighted avg       0.95      0.95      0.95      2006

2025-12-08 20:18:19,636 INFO: 3-fold CV F1: [0.94147326 0.9491353  0.93264781] (mean=0.9410854581152536)
2025-12-08 20:18:19,782 INFO: Saved model to /Volumes/backup_a/project/sentinel2_builtup_pipeline-main/output/model/builtup_rf_mean_200.