**install deps**

In [30]:
%pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


**load env**

In [31]:
from dotenv import load_dotenv
import os
load_dotenv()
esv_api_key = os.getenv("ESV_API_KEY")

In [32]:
import requests
import json

def get_passage(book, chapter, folder, html=False):
    passage_type = "html" if html else "text"
    url = f"https://api.esv.org/v3/passage/{passage_type}/"
    headers = { "Authorization": f"Token {esv_api_key}" }
    params = { "q": book + str(chapter) }

    response = requests.get(url, headers=headers, params=params)

    with open(f"{folder}/{book}-{chapter}.json", "w") as f:
        json.dump(response.json(), f)

In [34]:
books = [("john", 1)]

def get_multiple_passages(books):
    for book_name, chapters in books:
        for chapter in range(1, chapters + 1):
            get_passage(book_name, chapter, "raw_books")

In [60]:
import re

def parse_esv_output(text):
    # replace all instances of \(\d+\) with nothing
    text = re.sub(r"\(\d+\)", "", text)

    # replace all \\n\\n([\w\s,]+)\\n\\n with \n\n[\1]\n\n
    text = re.sub(r"\n\n([\w\s,]+)\n\n", r" [title] \1", text)

    matches = re.findall(r"\[(\d+|title)\] (.*?)(?= \[(\d+|title)\])", text)

    return [[chapter, verse.strip()] for chapter, verse, _ in matches]

In [61]:

import os
import json

for book in os.listdir("raw_books"):
    if book == ".gitkeep": continue

    with open(f"raw_books/{book}") as f:
        data = json.load(f)

    verses = data["passages"][0]

    parsed = parse_esv_output(verses)

    book_name = book[:-5]

    with open(f"books/{book_name}.json", "w") as f:
        json.dump(parsed, f)


### schema for parsed books

these json files are ordered maps. hopefully this doesn't cause any incorrect ordering in the future.

`["title" | int, str][]` 


In [36]:
books = [("john", 21)]

def get_multiple_passages(books):
    for book_name, chapters in books:
        for chapter in range(1, chapters + 1):
            get_passage(book_name, chapter, "raw_html_books", True)

In [None]:
# # read from raw_books/john-1.json
# with open("raw_books/john-1.json") as f:
#     data = json.load(f)

# text = data["passages"][0]

# # replace all instances of \(\d+\) with nothing
# text = re.sub(r"\(\d+\)", "", text)

# # replace all \\n\\n([\w\s,]+)\\n\\n with \n\n[\1]\n\n
# text = re.sub(r"\n\n([\w\s,]+)\n\n", r" [title] \1", text)

# print(json.dumps(text))

# matches = re.findall(r"\[(\d+|title)\] (.*?)(?= \[(\d+|title)\])", text)

# for match in matches:
#     print(match)
