In [None]:
from pathlib import Path
import os
from tqdm import tqdm
from huggingface_hub import login
from datasets import load_dataset, Dataset
from pathlib import Path
import numpy as np
from PIL import Image
#from datasets import load_dataset, concatenate_datasets

hf_token = "<HF_TOKEN>"  # Insert your Huggingface token here
login(hf_token)

In [None]:
USERNAME = "<HF_USERNAME>"
HF_DATASET_NAME = "<HF_DATASET_NAME>"

In [3]:
# TrainID to labelID mapping (one-to-one, exact)
trainID_to_labelID = {
    0: 7,
    1: 8,
    2: 11,
    3: 12,
    4: 13,
    5: 17,
    6: 19,
    7: 20,
    8: 21,
    9: 22,
    10: 23,
    11: 24,
    12: 25,
    13: 26,
    14: 27,
    15: 28,
    16: 31,
    17: 32,
    18: 33,
    255: 0  # ignore
}

# Reverse map labelID to trainID for completeness (used in training script)
labelID_to_trainID = {v: k for k, v in trainID_to_labelID.items()}

def trainid_to_labelid(image_array):
    """Convert trainID image array to labelID image array"""
    labelid_image = np.zeros_like(image_array, dtype=np.uint8) + 255  # default ignore
    
    for trainid, labelid in trainID_to_labelID.items():
        mask = (image_array == trainid)
        labelid_image[mask] = labelid
        
    return labelid_image

def save_hf_dataset_as_cityscapes(dataset, base_dir, ds_type, city_name):
    assert ds_type in ["train", "val"]
    image_dir = Path(base_dir) / "leftImg8bit" / ds_type / city_name
    label_dir = Path(base_dir) / "gtFine" / ds_type / city_name
    Path(image_dir).mkdir(parents=True, exist_ok=True)
    Path(label_dir).mkdir(parents=True, exist_ok=True)

    for i, example in enumerate(tqdm(dataset)):
        img_path = os.path.join(image_dir, f"{i:06d}_leftImg8bit.png")
        label_path = os.path.join(label_dir, f"{i:06d}_gtFine_labelIds.png")
        label = np.array(example["label"])
        new_label = trainid_to_labelid(label)
        example["image"].save(img_path)
        Image.fromarray(new_label).save(label_path)

In [None]:
ds_cityscapes = load_dataset(f"{USERNAME}/{HF_DATASET_NAME}")

In [None]:
val_ds = ds_cityscapes["val"]
save_hf_dataset_as_cityscapes(val_ds, base_dir="val_ds/cityscapes", ds_type="val", city_name="original")

In [None]:
train_ds = ds_cityscapes["train"]
save_hf_dataset_as_cityscapes(train_ds, base_dir="train_ds/cityscapes", ds_type="train", city_name="original")

In [None]:
import shutil
shutil.make_archive(HF_DATASET_NAME, 'zip', "./train_ds")

In [None]:
import shutil
shutil.make_archive(HF_DATASET_NAME, 'zip', "./val_ds")

In [None]:
# Upload to Kaggle

import kagglehub
from kagglehub.config import get_kaggle_credentials

os.environ["KAGGLE_USERNAME"] = "<KAGGLE_USERNAME>"
os.environ["KAGGLE_KEY"] = "<KAGGLE_KEY>"
!export KAGGLE_USERNAME="<KAGGLE_USERNAME>"
!export KAGGLE_KEY="<KAGGLE_KEY>"

#username, _ = get_kaggle_credentials()
dataset_ref = kagglehub.dataset_upload(
    f"{USERNAME}/{HF_DATASET_NAME}",         # Kaggle dataset path: <username>/<slug>
    f"{HF_DATASET_NAME}.zip"          # Local folder with your files
)
print("Uploaded dataset ref:", dataset_ref)
