In [1]:
import os
import sys
from pathlib import Path

# -------------------------------------------------
# Find repo root (folder that contains "src/imgofup")
# -------------------------------------------------
p = Path.cwd().resolve()
REPO_ROOT = None

for candidate in [p, *p.parents]:
    if (candidate / "src" / "imgofup").is_dir():
        REPO_ROOT = candidate
        break

if REPO_ROOT is None:
    raise RuntimeError("Could not find repo root (no 'src/imgofup' found).")

SRC_DIR = REPO_ROOT / "src"

# Make src/ importable (NOT the repo root)
if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

# Tell config where project root is
os.environ["PROJ_ROOT"] = str(REPO_ROOT)

print("ðŸ“¦ Repo root:", REPO_ROOT)
print("ðŸ”§ PROJ_ROOT:", os.environ["PROJ_ROOT"])
print("âœ… src/ added to sys.path:", SRC_DIR)


ðŸ“¦ Repo root: /Users/amirdonyadide/Documents/GitHub/IMGOFUP
ðŸ”§ PROJ_ROOT: /Users/amirdonyadide/Documents/GitHub/IMGOFUP
âœ… src/ added to sys.path: /Users/amirdonyadide/Documents/GitHub/IMGOFUP/src


In [2]:
from pathlib import Path

from imgofup.config.constants import (
    # core
    MAP_ID_WIDTH,
    FIXED_OPERATOR_CLASSES,

    # userstudy knobs (you added these to constants.py)
    USERSTUDY_PBF_PATH_DEFAULT,
    USERSTUDY_BBOX_DEFAULT,
    USERSTUDY_TARGET_CRS_DEFAULT,
    USERSTUDY_TILE_SIZE_M_DEFAULT,
    USERSTUDY_TOP_K_TILES_DEFAULT,
    USERSTUDY_SEED_DEFAULT,
    USERSTUDY_SHOW_ROADS_DEFAULT,

    USERSTUDY_SAMPLES_DIR_DEFAULT,
    USERSTUDY_METADATA_DIR_DEFAULT,
    USERSTUDY_META_CSV_NAME_DEFAULT,
    USERSTUDY_META_XLSX_NAME_DEFAULT,
)

# ---- Paths / region ----
PBF_PATH   = Path(USERSTUDY_PBF_PATH_DEFAULT).expanduser()
BBOX       = list(USERSTUDY_BBOX_DEFAULT)              # [min_lon, min_lat, max_lon, max_lat]
TARGET_CRS = str(USERSTUDY_TARGET_CRS_DEFAULT)

# ---- tiling ----
TILE_SIZE_M = float(USERSTUDY_TILE_SIZE_M_DEFAULT)
TOP_K       = int(USERSTUDY_TOP_K_TILES_DEFAULT)

# ---- rendering / roads ----
SHOW_ROADS  = bool(USERSTUDY_SHOW_ROADS_DEFAULT)

# ---- outputs ----
SAMPLES_DIR  = Path(USERSTUDY_SAMPLES_DIR_DEFAULT).expanduser()
METADATA_DIR = Path(USERSTUDY_METADATA_DIR_DEFAULT).expanduser()
META_CSV     = METADATA_DIR / USERSTUDY_META_CSV_NAME_DEFAULT
META_XLSX    = METADATA_DIR / USERSTUDY_META_XLSX_NAME_DEFAULT

# ---- misc ----
SEED = int(USERSTUDY_SEED_DEFAULT)

print("PBF_PATH   :", PBF_PATH)
print("exists     :", PBF_PATH.is_file())
print("BBOX       :", BBOX)
print("TARGET_CRS :", TARGET_CRS)
print("TILE_SIZE_M:", TILE_SIZE_M)
print("TOP_K      :", TOP_K)
print("SHOW_ROADS :", SHOW_ROADS)
print("SAMPLES_DIR:", SAMPLES_DIR)
print("METADATA_DIR:", METADATA_DIR)
print("META_CSV   :", META_CSV)
print("META_XLSX  :", META_XLSX)
print("SEED       :", SEED)
print("Operators  :", list(FIXED_OPERATOR_CLASSES))


PBF_PATH   : ../data/input/koeln-regbez-250927.osm.pbf
exists     : True
BBOX       : [7.0, 50.65, 7.2, 50.82]
TARGET_CRS : EPSG:25832
TILE_SIZE_M: 400.0
TOP_K      : 824
SHOW_ROADS : False
SAMPLES_DIR: ../data/input/samples/pairs_new
METADATA_DIR: ../data/input/samples/metadata_new
META_CSV   : ../data/input/samples/metadata_new/meta.csv
META_XLSX  : ../data/input/samples/metadata_new/meta.xlsx
SEED       : 42
Operators  : ['simplify', 'select', 'aggregate', 'displace']


In [3]:
import geopandas as gpd
from pyrosm import OSM

print("Loading OSM from:", PBF_PATH)

osm = OSM(str(PBF_PATH), bounding_box=BBOX)

buildings = osm.get_buildings()
if buildings is None or len(buildings) == 0:
    raise RuntimeError("No buildings found in the bounding box. Adjust BBOX or use a different PBF.")

# Ensure metric CRS
buildings = buildings.to_crs(TARGET_CRS)

roads = None
if SHOW_ROADS:
    roads = osm.get_network(network_type="all")  # or "driving" for fewer lines
    if roads is not None and len(roads) > 0:
        roads = roads.to_crs(TARGET_CRS)

print("âœ… Buildings:", len(buildings), "| CRS:", buildings.crs)
print("âœ… Roads    :", (0 if roads is None else len(roads)), "| CRS:", (None if roads is None else roads.crs))

# Keep only geometry (lighter, faster)
buildings = buildings[["geometry"]].copy()
if roads is not None:
    keep = ["geometry"] + (["highway"] if "highway" in roads.columns else [])
    roads = roads[keep].copy()


Loading OSM from: ../data/input/koeln-regbez-250927.osm.pbf


  return lib.buffer(
  return lib.buffer(


âœ… Buildings: 188628 | CRS: EPSG:25832
âœ… Roads    : 0 | CRS: None


In [4]:
from imgofup.userstudy.sample_generation import (
    UserStudySamplePaths,
    generate_userstudy_samples,
)

out = UserStudySamplePaths(
    samples_dir=SAMPLES_DIR,
    metadata_dir=METADATA_DIR,
).ensure()

res = generate_userstudy_samples(
    buildings=buildings,
    roads=roads,                 # can be None
    tile_size_m=TILE_SIZE_M,
    out=out,
    top_k_tiles=TOP_K,
    operators=FIXED_OPERATOR_CLASSES,
    seed=SEED,
    render_png=True,             # set False if you only want geojson + meta
    show_roads=SHOW_ROADS,
    simplify_for_render=True,    # keeps PNGs lighter
)

print("\nâœ… Done")
print(res)
print("\nArtifacts:")
print(" - Samples dir :", res.samples_dir)
print(" - Meta CSV    :", res.meta_csv)
print(" - Meta XLSX   :", res.meta_xlsx)


  return lib.simplify_preserve_topology(geometry, tolerance, **kwargs)
  return lib.buffer(
  return lib.unary_union(collections, **kwargs)
  return lib.buffer(
  return lib.buffer(
  return lib.unary_union(collections, **kwargs)
  return lib.buffer(
  return lib.buffer(
  return lib.unary_union(collections, **kwargs)
  return lib.buffer(
  return lib.buffer(
  return lib.unary_union(collections, **kwargs)
  return lib.buffer(
  return lib.buffer(
  return lib.unary_union(collections, **kwargs)
  return lib.buffer(
  return lib.buffer(
  return lib.unary_union(collections, **kwargs)
  return lib.buffer(
  return lib.buffer(
  return lib.unary_union(collections, **kwargs)
  return lib.buffer(
  return lib.buffer(
  return lib.unary_union(collections, **kwargs)
  return lib.buffer(
  return lib.buffer(
  return lib.unary_union(collections, **kwargs)
  return lib.buffer(
  return lib.buffer(
  return lib.unary_union(collections, **kwargs)
  return lib.buffer(
  return lib.buffer(
  return


âœ… Done
SampleGenResult(n_tiles_total=824, n_tiles_selected=824, samples_dir='../data/input/samples/pairs_new', meta_csv='../data/input/samples/metadata_new/meta.csv', meta_xlsx='../data/input/samples/metadata_new/meta.xlsx')

Artifacts:
 - Samples dir : ../data/input/samples/pairs_new
 - Meta CSV    : ../data/input/samples/metadata_new/meta.csv
 - Meta XLSX   : ../data/input/samples/metadata_new/meta.xlsx


In [5]:
import pandas as pd

meta_csv = Path(res.meta_csv)
if not meta_csv.is_file():
    raise FileNotFoundError(f"Missing meta.csv at {meta_csv}")

df = pd.read_csv(meta_csv)
print("âœ… meta.csv rows:", len(df))
display(df.head(10))

print("\nCounts by operator:")
display(df["operator"].value_counts())

print("\nOperator Ã— intensity:")
display(pd.crosstab(df["operator"], df["intensity"]))

print("\nEmpty targets:", int(df.get("is_target_empty", pd.Series([0])).sum()))


âœ… meta.csv rows: 824


Unnamed: 0,sample_id,tile_id,operator,intensity,param_value,param_unit,n_input_polys,n_target_polys,ratio,is_target_empty,input_geojson,target_geojson
0,80,80,aggregate,high,3.002,m,190,37,0.19,False,../data/input/samples/pairs_new/0080/0080_inpu...,../data/input/samples/pairs_new/0080/0080_gene...
1,160,160,aggregate,high,6.725,m,73,19,0.26,False,../data/input/samples/pairs_new/0160/0160_inpu...,../data/input/samples/pairs_new/0160/0160_gene...
2,177,177,aggregate,high,3.314,m,270,68,0.25,False,../data/input/samples/pairs_new/0177/0177_inpu...,../data/input/samples/pairs_new/0177/0177_gene...
3,178,178,aggregate,high,3.952,m,91,20,0.22,False,../data/input/samples/pairs_new/0178/0178_inpu...,../data/input/samples/pairs_new/0178/0178_gene...
4,208,208,aggregate,high,2.746,m,554,96,0.17,False,../data/input/samples/pairs_new/0208/0208_inpu...,../data/input/samples/pairs_new/0208/0208_gene...
5,209,209,aggregate,high,2.521,m,502,118,0.24,False,../data/input/samples/pairs_new/0209/0209_inpu...,../data/input/samples/pairs_new/0209/0209_gene...
6,211,211,aggregate,high,3.543,m,143,40,0.28,False,../data/input/samples/pairs_new/0211/0211_inpu...,../data/input/samples/pairs_new/0211/0211_gene...
7,222,222,aggregate,high,4.741,m,89,29,0.33,False,../data/input/samples/pairs_new/0222/0222_inpu...,../data/input/samples/pairs_new/0222/0222_gene...
8,363,363,aggregate,high,2.515,m,403,97,0.24,False,../data/input/samples/pairs_new/0363/0363_inpu...,../data/input/samples/pairs_new/0363/0363_gene...
9,409,409,aggregate,high,3.06,m,191,51,0.27,False,../data/input/samples/pairs_new/0409/0409_inpu...,../data/input/samples/pairs_new/0409/0409_gene...



Counts by operator:


operator
aggregate    206
displace     206
select       206
simplify     206
Name: count, dtype: int64


Operator Ã— intensity:


intensity,high,low,medium
operator,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
aggregate,68,69,69
displace,68,69,69
select,68,69,69
simplify,68,69,69



Empty targets: 0


In [6]:
from pathlib import Path

samples_root = Path(res.samples_dir)
some = sorted([p for p in samples_root.iterdir() if p.is_dir()])[:1]
if not some:
    raise RuntimeError(f"No sample subfolders found under {samples_root}")

sample_dir = some[0]
print("Sample folder:", sample_dir)
print("Files:")
for f in sorted(sample_dir.glob("*")):
    print(" -", f.name)


Sample folder: ../data/input/samples/pairs_new/0001
Files:
 - 0001_generalized.geojson
 - 0001_input.geojson
 - generalized_0001.png
 - input_0001.png
