In [11]:
from __future__ import annotations
import shutil
from pathlib import Path
import kagglehub

DATASET_ID = "gentrexha/kosovo-news-articles-dataset"

PROJECT_ROOT = Path.cwd().parents[0]
DIR = PROJECT_ROOT / "data"
FILENAME = "kosovo_news.csv"

def ensure_dir() -> None:
    DIR.mkdir(parents=True, exist_ok=True)


def find_first_csv(dataset_path: Path) -> Path:
    csv_files = sorted(dataset_path.rglob("*.csv"))
    if not csv_files:
        raise FileNotFoundError(
            f"No CSV file found in dataset path: {dataset_path}"
        )
    return csv_files[0]


def main() -> None:
    ensure_dir()

    print(f"Downloading dataset: {DATASET_ID}")
    dataset_path = Path(kagglehub.dataset_download(DATASET_ID))
    print("KaggleHub cache path:", dataset_path)

    print("Searching for CSV file...")
    csv_file = find_first_csv(dataset_path)
    print("Found:", csv_file)

    target_file = DIR / FILENAME

    if target_file.exists():
        print("Dataset already exists locally:", target_file)
        return

    print("Saving original dataset to:", target_file)
    shutil.copy2(csv_file, target_file)

    print("\nDone")
    print("Local dataset location:", target_file)


if __name__ == "__main__":
    main()

Downloading dataset: gentrexha/kosovo-news-articles-dataset
KaggleHub cache path: /Users/bleronaidrizi/.cache/kagglehub/datasets/gentrexha/kosovo-news-articles-dataset/versions/4
Searching for CSV file...
Found: /Users/bleronaidrizi/.cache/kagglehub/datasets/gentrexha/kosovo-news-articles-dataset/versions/4/Kosovo-News-Articles-V2/Kosovo-News-Articles.csv
Saving original dataset to: /Users/bleronaidrizi/Sources/Master_Tema_e_Diplomes/Punimi/Sarcasm-Detection-Albanian-News-Dataset/data/kosovo_news.csv

Done
Local dataset location: /Users/bleronaidrizi/Sources/Master_Tema_e_Diplomes/Punimi/Sarcasm-Detection-Albanian-News-Dataset/data/kosovo_news.csv
