In [None]:
import requests
import json
import tqdm
import os
from dotenv import load_dotenv

load_dotenv()

CLIENT_ID = os.environ.get("CLIENT_ID", None)
assert CLIENT_ID

CLIENT_SECRET = os.environ.get("CLIENT_SECRET", None)
assert CLIENT_SECRET

res = requests.post(
    "https://oauth.piste.gouv.fr/api/oauth/token",
    data={
        "grant_type": "client_credentials",
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "scope": "openid",
    },
)
token = json.loads(res.content)["access_token"]

In [None]:
headers = {
    "Authorization": f"Bearer {token}",
    "accept": "application/json",
    "Content-Type": "application/json",
}

OUTPUT_REPO_PATH = "../legifrance"

In [None]:
URL_BASE = "https://api.piste.gouv.fr/dila/legifrance/lf-engine-app"

res = requests.post(
    URL_BASE + "/list/code",
    json.dumps(
        {
            "pageSize": 100,
            "pageNumber": 1,
            "states": ["VIGUEUR"],  # "VIGUEUR", "ABROGE", "VIGEUR_DIIF"
        }
    ),
    headers=headers,
)

l = json.loads(res.content)

In [None]:
for i, c in enumerate(l["results"]):
    if c["etat"] == "VIGUEUR":
        print(f"{i}: {c['titre']}")

In [None]:
code = "LEGITEXT000044595989"

In [None]:
res = requests.post(
    URL_BASE + "/consult/legi/tableMatieres",
    json.dumps({"textId": code, "nature": "CODE", "date": "2023-10-10"}),
    headers=headers,
)

In [None]:
with open("code.json", "w") as f:
    f.write(json.dumps(json.loads(res.content), indent=4))

In [None]:
tm = json.loads(res.content)

In [None]:
def yield_article_ids(tm):
    if tm["etat"] == "ABROGE":
        return

    if len(tm["articles"]) > 0:
        for article in tm["articles"]:
            if article["etat"] != "ABROGE":
                yield article["cid"]

    if len(tm["sections"]) > 0:
        for section in tm["sections"]:
            yield from yield_article_ids(section)


articles = list(yield_article_ids(tm))
len(articles)

In [None]:
articles = articles[:50]
len(articles)

In [None]:
!mkdir -p ./cache/articles
CACHE_DIR = "./cache"

def get_article_by_cid(cid: str):
  path = f"{CACHE_DIR}/articles/{cid}.json"

  try:
    with open(path, 'r') as f:
      return json.load(f)
  except (IOError, ValueError):
    res = requests.post(
        URL_BASE + "/consult/getArticleByCid",
        json.dumps({"cid": cid}),
        headers=headers,
    )
    assert res.status_code == 200, res.status_code

    article = json.loads(res.content)

    with open(path, 'w') as f:
      json.dump(article, f, indent=4)

    return article


In [None]:
def get_commits_for_cid(cid: str):
    article = get_article_by_cid(cid)

    commits = {}
    for version in article["listArticle"]:
        modifs = version["lienModifications"]
        date = version["dateDebut"]
        textCids = sorted({m["textCid"] for m in modifs})

        if len(textCids) == 0:
            textCids = {"???"}
            # TODO

        commitId = f"{date}-{'-'.join(textCids)}"
        # TODO
        commitTitle = "Modifications par " + " & ".join(
            {m["textTitle"] if m["textTitle"] is not None else "?TODO?" for m in modifs}
        )
        text = version["texte"]  # TODO html?

        assert commitId not in commits
        commits[commitId] = {
            "commitTitle": commitTitle,
            "articles": {cid: text},
            "date": date,
        }

    return commits

In [None]:
all_commits = []
for article_cid in tqdm.tqdm(articles):
    all_commits.append(get_commits_for_cid(article_cid))

In [None]:
merged = {}
for partial in all_commits:
    for commitId, c in partial.items():
        if commitId in merged:
            assert merged[commitId]["date"] == c["date"]
            # TODO: humans ...
            # assert merged[commitId]['commitTitle'] == c['commitTitle'], merged[commitId]['commitTitle']  + " !== " + c['commitTitle']

            for articleCid, text in c["articles"].items():
                assert articleCid not in merged[commitId]["articles"]
                merged[commitId]["articles"][articleCid] = text

        else:
            merged[commitId] = c

In [None]:
sorted_commits = sorted(merged.values(), key=lambda c: c["date"])

In [None]:
!rm -rf ../legifrance
!mkdir ../legifrance
!git init ../legifrance

import sys


def last_text(commits: list[dict], cid):
    for c in reversed(commits):
        if cid in c["articles"]:
            return c["articles"][cid]

    return "<TODO>"


def print_tm(tm, commits, file=sys.stdout, level=1):
    if tm["etat"] == "ABROGE":
        return

    print(f'{("#" * level)} {tm["title"]}', file=file)

    if len(tm["articles"]) > 0:
        for article in tm["articles"]:
            if article["etat"] != "ABROGE":
                print(f"{('#' * (level + 1))} Article {article['num']}", file=file)
                print(last_text(commits, article["cid"]), file=file)
                print("\n", file=file)

        return

    if len(tm["sections"]) > 0:
        for section in tm["sections"]:
            print_tm(section, commits, file=file, level=level + 1)

        return

    if tm["commentaire"]:
        print(tm["commentaire"], file=file)
        return

    # assert False, tm
    # TODO


import os
import subprocess
import pytz
from datetime import datetime

tz = pytz.timezone("UTC")

for i in range(0, len(sorted_commits) - 1):
    with open(f"{OUTPUT_REPO_PATH}/{tm['title']}.md", "w") as f:
        print_tm(tm, sorted_commits[: (i + 1)], file=f)

    date = sorted_commits[i]["date"] / 1000  # TODO ms vs s
    print(date)
    date_str = datetime.fromtimestamp(date, tz).isoformat()
    date_with_format_str = "format:iso8601:" + date_str

    env = os.environ.copy()
    env["GIT_COMMITTER_DATE"] = date_with_format_str

    subprocess.run(["git", "add", "."], cwd=OUTPUT_REPO_PATH)
    subprocess.run(
        [
            "git",
            "commit",
            "--date",
            date_with_format_str,
            "-m",
            sorted_commits[i]["commitTitle"],
        ],
        env=env,
        cwd=OUTPUT_REPO_PATH,
    )

subprocess.run(
    [
        "git",
        "remote",
        "add",
        "origin",
        "git@github.com:LexHub-project/legifrance.git",
    ],
    cwd=OUTPUT_REPO_PATH,
)

subprocess.run(
    [
        "git",
        "push",
        "-f",
        "origin",
        "main",
    ],
    cwd=OUTPUT_REPO_PATH,
)