In [None]:
import os

if not (os.curdir.endswith("dist")):
    os.chdir("./dist")

print(os.curdir)

In [None]:
# VALIDATION #

from tableschema import Table, CastError
from pprint import pprint

table = Table(
    "decp.csv",
    schema="https://raw.githubusercontent.com/ColinMaudry/decp-table-schema/main/schema.json",
)

errors = []


def exc_handler(exc, row_number=None, row_data=None, error_data=None):
    errors.append((exc.errors, f"row {row_number}", error_data))


table.read(exc_handler=exc_handler)
len(errors)

In [None]:
# DATA PACKAGE #

from frictionless import Package, Resource, Pipeline, steps

outputs = [
    {
        "csv": "decp.csv",
        "steps": [
            steps.field_update(name="acheteur.id", descriptor={"type": "string"}),
            steps.field_update(name="acheteur.nom", descriptor={"type": "string"}),
            steps.field_update(name="titulaire.id", descriptor={"type": "string"}),
        ],
    },
    {
        "csv": "decp-sans-titulaires.csv",
        "steps": [
            steps.field_update(name="acheteur.id", descriptor={"type": "string"}),
            steps.field_update(name="acheteur.nom", descriptor={"type": "string"}),
        ],
    },
    {
        "csv": "decp-titulaires.csv",
        "steps": [
            steps.field_update(name="acheteur.id", descriptor={"type": "string"}),
            steps.field_update(name="acheteur.nom", descriptor={"type": "string"}),
            steps.field_update(name="departement", descriptor={"type": "string"}),
            steps.field_update(name="acheteur.nom", descriptor={"type": "string"}),
        ],
    },
]

resources = []

for output in outputs:
    resource: Resource = Resource(path=output["csv"])

    # Cette méthode détecte les caractéristiques du CSV et tente de deviner les datatypes
    resource.infer()
    resource = resource.transform(Pipeline(steps=output["steps"]))
    resources.append(resource)

Package(
    name="decp",
    title="DECP tabulaire",
    description="Données essentielles de la commande publique (FR) au format tabulaire.",
    resources=resources,
    # it's possible to provide all the official properties like homepage, version, etc
).to_json("datapackage.json")

In [None]:
# *** SQLITE ***#

from datapackage_to_datasette import datapackage_to_datasette

if os.path.exists("decp.sqlite"):
    os.remove("decp.sqlite")

datapackage_to_datasette(
    dbname="decp.sqlite",
    data_package="datapackage.json",
    metadata_filename="datasette_metadata.json",
    write_mode="replace",
)

In [None]:
# *** PUBLICATION SUR DATA.GOUV.FR ***#

import requests
import json

api_key = os.getenv("DATAGOUVFR_API_KEY")
api = "https://www.data.gouv.fr/api/1"
dataset_id = "608c055b35eb4e6ee20eb325"


def update_resource(api, dataset_id, resource_id, file_path, api_key):
    url = f"{api}/datasets/{dataset_id}/resources/{resource_id}/upload/"
    headers = {"X-API-KEY": api_key}
    files = {"file": open(file_path, "rb")}
    response = requests.post(url, files=files, headers=headers)
    return response.json()


uploads = [
    {"file": "decp.csv", "resource_id": "8587fe77-fb31-4155-8753-f6a3c5e0f5c9"},
    {"file": "decp.parquet", "resource_id": "11cea8e8-df3e-4ed1-932b-781e2635e432"},
    {
        "file": "decp-titulaires.csv",
        "resource_id": "25fcd9e6-ce5a-41a7-b6c0-f140abb2a060",
    },
    {
        "file": "decp-titulaires.parquet",
        "resource_id": "ed8cbf31-2b86-4afc-9696-3c0d7eae5c64",
    },
    {
        "file": "decp-sans-titulaires.csv",
        "resource_id": "834c14dd-037c-4825-958d-0a841c4777ae",
    },
    {
        "file": "decp-sans-titulaires.parquet",
        "resource_id": "df28fa7d-2d36-439b-943a-351bde02f01d",
    },
    {"file": "decp.sqlite", "resource_id": "c6b08d03-7aa4-4132-b5b2-fd76633feecc"},
    {"file": "datapackage.json", "resource_id": "65194f6f-e273-4067-8075-56f072d56baf"},
]

for upload in uploads:
    print(f"Mise à jour de {upload['file']}...")
    print(
        json.dumps(
            update_resource(
                api, dataset_id, upload["resource_id"], upload["file"], api_key
            ),
            indent=4,
        )
    )