**ORGANIZERS ONLY NOTEBOOK**

- Paper dataset: https://openaccess.thecvf.com/content/ICCV2021/supplemental/Garnot_Panoptic_Segmentation_of_ICCV_2021_supplemental.pdf
- Dataset: https://github.com/VSainteuf/pastis-benchmark/tree/main

In [None]:
from pathlib import Path

import geopandas as gpd
import numpy as np
import pandas as pd

In [None]:
PATH_TO_PASTIS = "./PASTIS/"
METADATA = Path(PATH_TO_PASTIS) / "metadata.geojson"

mtd = gpd.read_file(METADATA)
mtd

# Split

In [None]:
TEST_FOLD_ID = 3

FOLDER_DATA = Path("PASTIS")
FOLDER_TRAIN = Path("TRAIN")
FOLDER_TEST = Path("TEST")

S2_SOURCE = FOLDER_DATA / "DATA_S2"
S2_TRAIN = FOLDER_TRAIN / "DATA_S2"
S2_TEST = FOLDER_TEST / "DATA_S2"

ANNOT_SOURCE = FOLDER_DATA / "ANNOTATIONS"
ANNOT_TRAIN = FOLDER_TRAIN / "ANNOTATIONS"
ANNOT_TEST = FOLDER_TEST / "ANNOTATIONS"

In [None]:
# Split
train_mtd = mtd[mtd["Fold"] != TEST_FOLD_ID]
test_mtd = mtd[mtd["Fold"] == TEST_FOLD_ID]

# Change indexing for images
train_mtd.loc[:, "ID"] = range(len(train_mtd))
test_mtd.loc[:, "ID"] = range(len(test_mtd))

# Add an offset to avoid duplicates with train + more readable
train_mtd.loc[:, "ID"] += 10000
test_mtd.loc[:, "ID"] += 20000

# Convert TILE to categorical to reduce information about the dataset
# mapping = {
#     "t30uxv": 0,
#     "t31tfj": 1,
#     "t31tfm": 2,
#     "t32ulu": 3,
# }
# train_mtd["region"] = train_mtd["TILE"].map(mapping)
# test_mtd["region"] = test_mtd["TILE"].map(mapping)

# Export names
train_old_names = train_mtd["ID_PATCH"].to_list()
test_old_names = test_mtd["ID_PATCH"].to_list()
train_new_names = train_mtd["ID"].to_list()
test_new_names = test_mtd["ID"].to_list()

display(train_mtd)
display(test_mtd)

- copy images

In [None]:
import shutil


def copy_imgs(
    indices_old: list[int],
    indices_new: list[int],
    source_folder: Path,
    dest_folder: Path,
) -> None:
    dest_folder.mkdir(parents=True, exist_ok=True)

    for i_old, i_new in zip(indices_old, indices_new):
        source = source_folder / f"S2_{i_old}.npy"
        destination = dest_folder / f"S2_{i_new}.npy"

        # Copy the file from source to destination
        shutil.copy(source, destination)


def copy_labels(
    indices_old: list[int],
    indices_new: list[int],
    source_folder: Path,
    dest_folder: Path,
) -> None:
    dest_folder.mkdir(parents=True, exist_ok=True)

    for i_old, i_new in zip(indices_old, indices_new):
        source = source_folder / f"TARGET_{i_old}.npy"
        destination = dest_folder / f"TARGET_{i_new}.npy"

        # Copy the file from source to destination
        shutil.copy(source, destination)

In [None]:
# # Copy train
# copy_imgs(train_old_names, train_new_names, S2_SOURCE, S2_TRAIN)
# copy_labels(train_old_names, train_new_names, ANNOT_SOURCE, ANNOT_TRAIN)

# # Copy test
# copy_imgs(test_old_names, test_new_names, S2_SOURCE, S2_TEST)
# copy_labels(test_old_names, test_new_names, ANNOT_SOURCE, ANNOT_TEST)

In [None]:
COLS_TO_KEEP = ["ID", "TILE", "N_Parcel", "Parcel_Cover", "dates-S2", "geometry"]

train_mtd = train_mtd[COLS_TO_KEEP]
test_mtd = test_mtd[COLS_TO_KEEP]

train_mtd.to_file(FOLDER_TRAIN / "metadata.geojson", driver="GeoJSON")
test_mtd.to_file(FOLDER_TEST / "metadata.geojson", driver="GeoJSON")

display(train_mtd)
display(test_mtd)

# Solution

In [None]:
test_labels = list(ANNOT_TEST.glob("*"))
test_labels = sorted(test_labels)
test_labels

In [None]:
solution = np.array([np.load(label)[0] for label in test_labels])
solution.shape

In [None]:
# Count pixels for each class
unique_true, counts_true = np.unique(solution, return_counts=True)

print("True Class Counts (%):")
for class_id, class_count in zip(unique_true, counts_true):
    print(class_id, class_count, f"({round(class_count / sum(counts_true) * 100, 1)}%)")
print("---")
print(f"Total: {sum(counts_true)}")

In [None]:
from baseline.submission_tools import masks_to_str

solution_masks = masks_to_str(solution)

N_TEST = len(solution)

np.random.seed(1234)
random_values = np.random.choice([0, 1], size=N_TEST)
usages = ["Private" if value == 1 else "Public" for value in random_values]

df_submission = pd.DataFrame.from_dict({"ID": test_mtd["ID"], "MASKS": solution_masks})
df_solution = pd.DataFrame.from_dict(
    {"ID": test_mtd["ID"], "Usage": usages, "MASKS": solution_masks}
)

# Note that the index=False argument is important.
df_submission.to_csv("submission.csv", index=False)
df_solution.to_csv("solution.csv", index=False)