In [None]:
import os

In [None]:
# DATA PACKAGE #

from frictionless import Package, Resource, Pipeline, steps

decp_resource: Resource = Resource(path="decp.csv")

# Cette méthode détecte les caractéristiques du CSV et tente de deviner les datatypes
decp_resource.infer()

decp_resource = decp_resource.transform(
    Pipeline(
        steps=[
            steps.field_update(name="acheteur.id", descriptor={"type": "string"}),
            steps.field_update(name="acheteur.nom", descriptor={"type": "string"}),
            steps.field_update(name="titulaire.id", descriptor={"type": "string"}),
        ]
    )
)

decp_sans_titulaire_resource = Resource(path="decp-sans-titulaires.csv")
decp_sans_titulaire_resource.infer()


decp_sans_titulaire_resource = decp_sans_titulaire_resource.transform(
    Pipeline(
        steps=[
            steps.field_update(name="acheteur.id", descriptor={"type": "string"}),
            steps.field_update(name="acheteur.nom", descriptor={"type": "string"}),
            steps.field_remove(name="titulaire.id"),
            steps.field_remove(name="titulaire.denominationSociale"),
            steps.field_remove(name="titulaire.typeIdentifiant"),
        ]
    )
)

package = Package(
    name="decp",
    title="DECP CSV",
    description="Données essentielles de la commande publique (FR) au format CSV.",
    resources=[decp_resource],
    # it's possible to provide all the official properties like homepage, version, etc
)

package.to_json("datapackage.json")

In [None]:
# *** SQLITE ***#

from datapackage_to_datasette import datapackage_to_datasette

if os.path.exists("decp.sqlite"):
    os.remove("decp.sqlite")

datapackage_to_datasette(
    dbname="decp.sqlite",
    data_package="datapackage.json",
    metadata_filename="datasette_metadata.json",
    write_mode="replace",
)

os.chdir("/home/git/decp-airflow")

In [None]:
# *** PUBLICATION SUR DATA.GOUV.FR ***#

import requests
import json

api_key = os.getenv("DATAGOUVFR_API_KEY")

api = "https://www.data.gouv.fr/api/1"
dataset_id = "608c055b35eb4e6ee20eb325"
resource_id_decp = "8587fe77-fb31-4155-8753-f6a3c5e0f5c9"
# resource_id_sans_titulaires="834c14dd-037c-4825-958d-0a841c4777ae"
resource_id_datapackage = "65194f6f-e273-4067-8075-56f072d56baf"
resource_id_sqlite = "c6b08d03-7aa4-4132-b5b2-fd76633feecc"


def update_resource(api, dataset_id, resource_id, file_path, api_key):
    url = f"{api}/datasets/{dataset_id}/resources/{resource_id}/upload/"
    headers = {"X-API-KEY": api_key}
    files = {"file": open(file_path, "rb")}
    response = requests.post(url, files=files, headers=headers)
    return response.json()


print("Mise à jour de decp.csv...")
print(
    json.dumps(
        update_resource(api, dataset_id, resource_id_decp, "decp.csv", api_key),
        indent=4,
    )
)

# print("\nMise à jour de decp-sans-titulaires.csv...")
# print(json.dumps(update_resource(api, dataset_id, resource_id_sans_titulaires, "decp-sans-titulaires.csv", api_key), indent=4))

print("\nMise à jour de datapackage.json...")
print(
    json.dumps(
        update_resource(
            api, dataset_id, resource_id_datapackage, "datapackage.json", api_key
        ),
        indent=4,
    )
)

print("\nMise à jour de decp.sqlite...")
print(
    json.dumps(
        update_resource(api, dataset_id, resource_id_sqlite, "decp.sqlite", api_key),
        indent=4,
    )
)