In [1]:
from cartiflette.utils import import_yaml_config
from cartiflette.config import FS
from cartiflette.s3 import upload_s3_raw
from cartiflette.download.download import _download_sources
from cartiflette.utils import create_path_bucket, official_epsg_codes
from cartiflette.config import PATH_WITHIN_BUCKET
from cartiflette.utils import hash_file

intermediate_dir="temp"
path_within_bucket="test-clean"
fs=FS
provider="IGN"
source="EXPRESS-COG-CARTO-TERRITOIRE"
year=2022
dataset_family="ADMINEXPRESS"
territory="metropole"
borders="COMMUNE"
path_within_bucket=PATH_WITHIN_BUCKET
vectorfile_format="shp"
bucket="projet-cartiflette"

In [2]:
local_dir = intermediate_dir
format_intermediate = "geojson"

yaml = import_yaml_config()

list_territories = yaml["IGN"]["ADMINEXPRESS"]["EXPRESS-COG-TERRITOIRE"][
        "territory"
].keys()

In [3]:
    from collections import OrderedDict 
    kwargs = OrderedDict()
    items = [
        ("sources", source),
        ("territories", territory),
        ("years", year),
        ("providers", provider),
        ("dataset_families", dataset_family),
    ]
    for key, val in items:
        if isinstance(val, str) or isinstance(val, int):
            kwargs[key] = [val]
        elif not val:
            kwargs[key] = [None]
        elif isinstance(val, list) or isinstance(val, tuple) or isinstance(val, set):
            kwargs[key] = list(val)
    kwargs

OrderedDict([('sources', ['EXPRESS-COG-CARTO-TERRITOIRE']),
             ('territories', ['metropole']),
             ('years', [2022]),
             ('providers', ['IGN']),
             ('dataset_families', ['ADMINEXPRESS'])])

In [4]:
from cartiflette.download.dataset import Dataset

datafile = Dataset(
                dataset_family,
                source,
                year,
                provider,
                territory,
                bucket,
                path_within_bucket,
            )
datafile

<Dataset IGN ADMINEXPRESS EXPRESS-COG-CARTO-TERRITOIRE metropole 2022>

In [None]:
from cartiflette.download.scraper import MasterScraper

hash = datafile.md5
url = datafile.get_path_from_provider()
with MasterScraper() as s:
    result = s.download_unpack(datafile)

In [None]:
x = _download_sources(
    upload=True,
    providers=provider,
    dataset_families=dataset_family,
    sources=source,
    territories=territory,
    years=year,
    path_within_bucket=path_within_bucket
)

In [5]:
    from cartiflette.utils import DICT_CORRESP_ADMINEXPRESS
    from cartiflette.mapshaper import mapshaperize_split

    path_raw_s3_combined = create_path_bucket(
        {
            "bucket": bucket,
            "path_within_bucket": path_within_bucket,
            "year": year,
            "borders": "france",
            "crs": 4326,
            "filter_by": "preprocessed",
            "value": "before_cog",
            "vectorfile_format": "geojson",
            "provider": "IGN",
            "dataset_family": "ADMINEXPRESS",
            "source": "EXPRESS-COG-CARTO-TERRITOIRE",
            "territory": "france",
            "filename": "raw.geojson",
            "simplification": 0,
        }
    )

    fs.download(path_raw_s3_combined, "temp/preprocessed_combined/COMMUNE.geojson")

[None]

In [6]:
    from cartiflette.pipeline.prepare_cog_metadata import prepare_cog_metadata
    localpath = "temp"
    tagc_metadata = prepare_cog_metadata(
            path_within_bucket, local_dir=localpath)
    tagc_metadata.drop(columns=["LIBGEO"]).to_csv(f"{localpath}/tagc.csv")    


                                    

In [8]:
    import subprocess
    local_dir = "temp/preprocessed_combined"
    filename_initial = "COMMUNE"
    extension_initial = "geojson"
    metadata_file = "temp/tagc.csv"
    from cartiflette.utils import DICT_CORRESP_ADMINEXPRESS
    output_path = "toto.geojson"

    # Mapshaper command for the enrichment process
    cmd_step1 = (
        f"mapshaper {local_dir}/{filename_initial}.{extension_initial} "
        f"name='' -proj EPSG:4326 "
        f"-join {metadata_file} "
        f"keys=INSEE_COM,CODGEO field-types=INSEE_COM:str,CODGEO:str "
        f"-filter-fields INSEE_CAN,INSEE_ARR,SIREN_EPCI,INSEE_DEP,INSEE_REG,NOM_M invert "
        f"-rename-fields INSEE_DEP=DEP,INSEE_REG=REG "
        f"-each \"{DICT_CORRESP_ADMINEXPRESS['FRANCE_ENTIERE']}='France'\" "
        f"-o {output_path}"
    )

    # Run Mapshaper command
    subprocess.run(cmd_step1, shell=True, check=True)

[proj] Source and destination CRS are the same
[join] Auto-detected number fields: , REG, ZE2020, TUU2017, TDUU2017, TAAV2017, TDAAV2017, CATEAAV2020
[join] Joined data from 34,955 source records to 34,955 target records
[o] Wrote toto.geojson


CompletedProcess(args='mapshaper temp/preprocessed_combined/COMMUNE.geojson name=\'\' -proj EPSG:4326 -join temp/tagc.csv keys=INSEE_COM,CODGEO field-types=INSEE_COM:str,CODGEO:str -filter-fields INSEE_CAN,INSEE_ARR,SIREN_EPCI,INSEE_DEP,INSEE_REG,NOM_M invert -rename-fields INSEE_DEP=DEP,INSEE_REG=REG -each "PAYS=\'France\'" -o toto.geojson', returncode=0)

In [None]:
    simplification_percent = simplification if simplification is not None else 0

    # City level borders, file location
    directory_city = config_file_city.get("location", local_dir)
    initial_filename_city = config_file_city.get("filename", "COMMUNE")
    extension_initial_city = config_file_city.get("extension", "shp")

    output_path = (
        f"{local_dir}/{territory}/{niveau_agreg}/{format_output}/{simplification=}"
    )

    os.makedirs(output_path, exist_ok=True)

    if simplification_percent != 0:
        option_simplify = f"-simplify {simplification_percent}% "
    else:
        option_simplify = ""

    temp_filename = "temp.geojson"

    # STEP 1: ENRICHISSEMENT AVEC COG
    mapshaper_enrich(
        local_dir=directory_city,
        filename_initial=initial_filename_city,
        extension_initial=extension_initial_city,
        dict_corresp=dict_corresp,
        output_path=temp_filename,
    )

    if niveau_polygons != initial_filename_city:
        csv_list_vars = (
            f"{dict_corresp[niveau_polygons]}," f"{dict_corresp[niveau_agreg]}"
