In [None]:
import json
import re
import unicodedata
from pathlib import Path
from collections import defaultdict


def preprocess_tourist_data():
    input_path = Path("sr_data.json")
    output_path = Path("sr_data_grouped.json")

    with input_path.open(encoding="utf-8") as f:
        data = json.load(f)

    def normalize_value(v):
        if v in (None, "", "-", "\"-\""):
            return 0
        try:
            return float(v)
        except (ValueError, TypeError):
            return 0

    def normalize_country(name):
        cleaned = re.sub(r"^\d+(\.\d+)?\s*", "", name).strip()
        return cleaned.capitalize()

    def normalize_name(s):
        if not isinstance(s, str):
            return ""
        normalized = unicodedata.normalize("NFD", s)
        stripped = "".join(ch for ch in normalized if unicodedata.category(ch) != "Mn")
        return stripped.strip().lower()


    grouped = defaultdict(lambda: defaultdict(list))
    for row in data:
        month = row["MESEC"]
        region = row["STATISTIČNA REGIJA"]
        grouped[month][region].append(row)


    result = []
    for month, regions in grouped.items():

        if month < "2020M01":
            continue

        month_entry = {"month": month, "regions": {}}

        for region_name, rows in regions.items():
            countries = []

            for r in rows:
                countries.append({
                    "name": normalize_country(r["DRŽAVA"]),
                    "data": {
                        "meritve": r["MERITVE"],
                        "data": normalize_value(r["DATA"])
                    }
                })

            region_key = normalize_name(region_name)

            month_entry["regions"][region_key] = {
                "countries": countries,
                "display_name": region_name.capitalize()
            }

        result.append(month_entry)

    with output_path.open("w", encoding="utf-8") as f:
        json.dump(result, f, ensure_ascii=False, indent=2)

    print(f"Saved grouped data to {output_path}")


if __name__ == "__main__":
    preprocess_tourist_data()


Saved grouped data to sr_data_grouped.json
