In [None]:
from cartiflette.utils import import_yaml_config
from cartiflette.config import FS
from cartiflette.s3 import upload_s3_raw
from cartiflette.download.download import _download_sources
from cartiflette.utils import create_path_bucket, official_epsg_codes
from cartiflette.config import PATH_WITHIN_BUCKET
from cartiflette.utils import hash_file

intermediate_dir="temp"
path_within_bucket="test-clean"
fs=FS
provider="IGN"
source="EXPRESS-COG-CARTO-TERRITOIRE"
year=2022
dataset_family="ADMINEXPRESS"
territory="metropole"
borders="COMMUNE"
path_within_bucket=PATH_WITHIN_BUCKET
vectorfile_format="shp"
bucket="projet-cartiflette"

In [None]:
local_dir = intermediate_dir
format_intermediate = "geojson"

yaml = import_yaml_config()

list_territories = yaml["IGN"]["ADMINEXPRESS"]["EXPRESS-COG-TERRITOIRE"][
        "territory"
].keys()

In [None]:
    from collections import OrderedDict 
    kwargs = OrderedDict()
    items = [
        ("sources", source),
        ("territories", territory),
        ("years", year),
        ("providers", provider),
        ("dataset_families", dataset_family),
    ]
    for key, val in items:
        if isinstance(val, str) or isinstance(val, int):
            kwargs[key] = [val]
        elif not val:
            kwargs[key] = [None]
        elif isinstance(val, list) or isinstance(val, tuple) or isinstance(val, set):
            kwargs[key] = list(val)
    kwargs

In [None]:
from cartiflette.download.dataset import Dataset

datafile = Dataset(
                dataset_family,
                source,
                year,
                provider,
                territory,
                bucket,
                path_within_bucket,
            )
datafile

In [None]:
from cartiflette.download.scraper import MasterScraper

hash = datafile.md5
url = datafile.get_path_from_provider()
with MasterScraper() as s:
    result = s.download_unpack(datafile)

In [None]:
x = _download_sources(
    upload=True,
    providers=provider,
    dataset_families=dataset_family,
    sources=source,
    territories=territory,
    years=year,
    path_within_bucket=path_within_bucket
)

## Mapshaper splits

In [None]:
    from cartiflette.utils import DICT_CORRESP_ADMINEXPRESS
    from cartiflette.mapshaper import mapshaperize_split

    path_raw_s3_combined = create_path_bucket(
        {
            "bucket": bucket,
            "path_within_bucket": path_within_bucket,
            "year": year,
            "borders": "france",
            "crs": 4326,
            "filter_by": "preprocessed",
            "value": "before_cog",
            "vectorfile_format": "geojson",
            "provider": "IGN",
            "dataset_family": "ADMINEXPRESS",
            "source": "EXPRESS-COG-CARTO-TERRITOIRE",
            "territory": "france",
            "filename": "raw.geojson",
            "simplification": 0,
        }
    )

    fs.download(path_raw_s3_combined, "temp/preprocessed_combined/COMMUNE.geojson")

In [None]:
    from cartiflette.pipeline.prepare_cog_metadata import prepare_cog_metadata
    localpath = "temp"
    tagc_metadata = prepare_cog_metadata(
            path_within_bucket, local_dir=localpath)
    tagc_metadata.drop(columns=["LIBGEO"]).to_csv(f"{localpath}/tagc.csv")    


In [None]:
    import subprocess
    import os
    from cartiflette.mapshaper.mapshaper_wrangling import mapshaper_enrich, mapshaper_split
    from cartiflette.utils import DICT_CORRESP_ADMINEXPRESS

    simplification = 50
    local_dir = "temp/preprocessed_combined"
    territory = "test"
    niveau_agreg = "UNITE_URBAINE"
    format_output = "geojson"
    niveau_polygons = "COMMUNE"
    crs = 2154
    provider = "IGN"
    source = "ADMIN-EXPRESS-COG-CARTO"
    dict_corresp = DICT_CORRESP_ADMINEXPRESS
    simplification_percent = simplification if simplification is not None else 0

    # City level borders, file location
    config_file_city = {}
    directory_city = config_file_city.get("location", local_dir)
    initial_filename_city = "COMMUNE"
    extension_initial_city = "geojson"

    output_path = (
        f"{local_dir}/{territory}/{niveau_agreg}/{format_output}/{simplification=}"
    )

    os.makedirs(output_path, exist_ok=True)

    if simplification_percent != 0:
        option_simplify = f"-simplify {simplification_percent}% "
    else:
        option_simplify = ""

    temp_filename = "temp.geojson"

    # STEP 1: ENRICHISSEMENT AVEC COG
    mapshaper_enrich(
        local_dir=directory_city,
        filename_initial=initial_filename_city,
        extension_initial=extension_initial_city,
        dict_corresp=dict_corresp,
        output_path=temp_filename,
    )

    if niveau_polygons != initial_filename_city:
        csv_list_vars = (
            f"{dict_corresp[niveau_polygons]}," f"{dict_corresp[niveau_agreg]}"
        )
        libelle_niveau_polygons = dict_corresp.get("LIBELLE_" + niveau_polygons, "")
        if libelle_niveau_polygons != "":
            libelle_niveau_polygons = f",{libelle_niveau_polygons}"
        libelle_niveau_agreg = dict_corresp.get("LIBELLE_" + niveau_agreg, "")
        if libelle_niveau_polygons != "":
            libelle_niveau_agreg = f",{libelle_niveau_agreg}"
        csv_list_vars = (
            f"{csv_list_vars}{libelle_niveau_polygons}{libelle_niveau_agreg}"
        )

        # STEP 1B: DISSOLVE IF NEEDED
        cmd_dissolve = (
            f"mapshaper {temp_filename} "
            f"name='' -proj EPSG:4326 "
            f"-dissolve {dict_corresp[niveau_polygons]} "
            f"calc='POPULATION=sum(POPULATION)' "
            f"copy-fields={csv_list_vars} "
            "-o temp.geojson force"
        )
        subprocess.run(cmd_dissolve, shell=True, check=True)

    # IF WE DESIRE TO BRING "DROM" CLOSER TO FRANCE
    if niveau_agreg.upper() == "FRANCE_ENTIERE_DROM_RAPPROCHES":
        niveau_filter_drom = "DEPARTEMENT"
        if niveau_polygons != "COMMUNE":
            niveau_filter_drom = niveau_polygons
        input_path = mapshaper_bring_closer(
            temp_filename, level_agreg=niveau_filter_drom
        )
    else:
        input_path = "temp.geojson"

    print(input_path)

    # STEP 2: SPLIT ET SIMPLIFIE
    mapshaper_split(
        input_file=input_path,
        layer_name="",
        split_variable=dict_corresp[niveau_agreg],
        output_path=output_path,
        format_output=format_output,
        crs=crs,
        option_simplify=option_simplify,
        source_identifier=f"{provider}:{source}",
    )


In [None]:
for level_agreg in ["DEPARTEMENT", "REGION", "BASSIN_VIE", "UNITE_URBAINE", "ZONE_EMPLOI", "AIRE_ATTRACTION_VILLES"]:
    print(level_agreg)
    mapshaper_bring_closer(
        level_agreg=level_agreg
        )