# 01-02 : Parse thanmee NLT data

For a newer version of the NLT parse the data from the GitHub repository [tahmmee/ftsbible](https://github.com/tahmmee/ftsbible) retrieved on 2025/01/26.

In [1]:
from typing import List, Dict, Any
import json
from IPython.display import display, Markdown

## 1. Configuration

In [2]:
data_path = '../../data'
input_path = f'{data_path}/input'

source_path = f'{input_path}/tahmmee/ftsbible'
output_path = f'{data_path}/output/tahmmee/ftsbible'

book_names_file = f'{input_path}/books.json'
input_file = f'{source_path}/nlt.json'
output_file = f'{output_path}/NLT.json'

## 2. Load Data

### 2.1. Load Book Names

In [3]:
book_names = json.load(open(book_names_file))
print(book_names)

{'1': {'BookName': 'Genesis', 'BookAbbreviation': 'Gen'}, '2': {'BookName': 'Exodus', 'BookAbbreviation': 'Exod'}, '3': {'BookName': 'Leviticus', 'BookAbbreviation': 'Lev'}, '4': {'BookName': 'Numbers', 'BookAbbreviation': 'Num'}, '5': {'BookName': 'Deuteronomy', 'BookAbbreviation': 'Deut'}, '6': {'BookName': 'Joshua', 'BookAbbreviation': 'Josh'}, '7': {'BookName': 'Judges', 'BookAbbreviation': 'Judg'}, '8': {'BookName': 'Ruth', 'BookAbbreviation': 'Ruth'}, '9': {'BookName': '1 Samuel', 'BookAbbreviation': '1 Sam'}, '10': {'BookName': '2 Samuel', 'BookAbbreviation': '2 Sam'}, '11': {'BookName': '1 Kings', 'BookAbbreviation': '1 Kgs'}, '12': {'BookName': '2 Kings', 'BookAbbreviation': '2 Kgs'}, '13': {'BookName': '1 Chronicles', 'BookAbbreviation': '1 Chr'}, '14': {'BookName': '2 Chronicles', 'BookAbbreviation': '2 Chr'}, '15': {'BookName': 'Ezra', 'BookAbbreviation': 'Ezra'}, '16': {'BookName': 'Nehemiah', 'BookAbbreviation': 'Neh'}, '17': {'BookName': 'Esther', 'BookAbbreviation': 'Es

### 2.2. Load the Source Data

In [4]:
source = json.load(open(input_file))
print(len(source), source[0].keys())

31064 dict_keys(['id', 'book', 'chapter', 'verse', 'text'])


## 3. Parse the data

### 3.1. Initialize the data structures

In [5]:
nlt_output = { book['BookName']: {} for key, book in book_names.items() }
nlt_output.keys()

dict_keys(['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy', 'Joshua', 'Judges', 'Ruth', '1 Samuel', '2 Samuel', '1 Kings', '2 Kings', '1 Chronicles', '2 Chronicles', 'Ezra', 'Nehemiah', 'Esther', 'Job', 'Psalms', 'Proverbs', 'Ecclesiastes', 'Song of Solomon', 'Isaiah', 'Jeremiah', 'Lamentations', 'Ezekiel', 'Daniel', 'Hosea', 'Joel', 'Amos', 'Obadiah', 'Jonah', 'Micah', 'Nahum', 'Habakkuk', 'Zephaniah', 'Haggai', 'Zechariah', 'Malachi', 'Matthew', 'Mark', 'Luke', 'John', 'Acts', 'Romans', '1 Corinthians', '2 Corinthians', 'Galatians', 'Ephesians', 'Philippians', 'Colossians', '1 Thessalonians', '2 Thessalonians', '1 Timothy', '2 Timothy', 'Titus', 'Philemon', 'Hebrews', 'James', '1 Peter', '2 Peter', '1 John', '2 John', '3 John', 'Jude', 'Revelation'])

In [6]:
# process every data record
for row in source:
    book = book_names[str(row['book'])]['BookName']
    chapter = row['chapter']
    verse = row['verse']
    text = row['text']
    
    if book not in nlt_output:
        print(f'Book {book} not found')
        continue
    if chapter not in nlt_output[book]:
        nlt_output[book][chapter] = {}
    nlt_output[book][chapter][str(verse)] = text

# save the output
with open(output_file, 'w') as f:
    json.dump(nlt_output, f, indent=2)