In [1]:
import os
import csv
import pandas
from collections import Counter

In [2]:
pandas.__version__

'0.15.2'

In [3]:
DATA_DIR = "/Users/brian/code/bible/data"
OUTPUT_DIR = "/Users/brian/code/bible/output"
TEXT_PATH = os.path.join(DATA_DIR, "bible-corpus/t_kjv.csv")
QUOTATION_PATH = os.path.join(DATA_DIR, "bible-ot-nt.tsv")
ABBREV_PATH = os.path.join(DATA_DIR, "bible-corpus/key_abbreviations_english.csv")

BOOK_DF_PATH = os.path.join(OUTPUT_DIR, "books_summary.csv")
QUOTATIONS_OUT_PATH = os.path.join(OUTPUT_DIR, "quotations.csv")
QUOTATIONS_OT_OUT_PATH = os.path.join(OUTPUT_DIR, "quotations_ot.csv")
QUOTATIONS_TEXT_OUT_PATH = os.path.join(OUTPUT_DIR, "quotations_text.csv")
QUOTATIONS_TEXT_OT_OUT_PATH = os.path.join(OUTPUT_DIR, "quotations_ot_text.csv")

In [4]:
book_df = pandas.read_csv(BOOK_DF_PATH)
abbrev_df = pandas.read_csv(ABBREV_PATH)
quotation_df = pandas.read_csv(QUOTATION_PATH, sep="\t")

# Have to add some extra rows to the abbreviation dataframe
abbrev_df = abbrev_df.append({"id": 343, "a": "De", "b": 5, "p": 0}, ignore_index=True)
abbrev_df = abbrev_df.append({"id": 344, "a": "Lu", "b": 42, "p": 0}, ignore_index=True)
abbrev_df = abbrev_df.append({"id": 345, "a": "Joh", "b": 43, "p": 0}, ignore_index=True)

In [5]:
book_lookup = dict(zip(book_df["b"], book_df["name"]))
abbrev_to_id = dict(zip(abbrev_df["a"], abbrev_df["b"]))
abbrev_to_book = dict(zip(abbrev_df["a"], [book_lookup[i] for i in abbrev_df["b"]]))

In [6]:
passages = quotation_df["NT"]
abbrevs, locations = [list(tup) for tup in zip(*[tuple(passage.split(" ")) for passage in passages])]
chapters, verses = [list(tup) for tup in zip(*[tuple(location.split(":")) for location in locations])]
chapters = [int(c) for c in chapters]
# select just the first verse for simplicity
verses = [int(vs.split(",")[0].split("-")[0]) for vs in verses]
books = [abbrev_to_book[abbrev] for abbrev in abbrevs]
book_ids = [abbrev_to_id[abbrev] for abbrev in abbrevs]

In [7]:
quotation_df["nt_b"] = book_ids
quotation_df["nt_book"] = books
quotation_df["nt_c"] = chapters
quotation_df["nt_v"] = verses

In [8]:
passages = quotation_df["OT"]
abbrevs, locations = [list(tup) for tup in zip(*[tuple(passage.split(" ")) for passage in passages])]
chapters, verses = [list(tup) for tup in zip(*[tuple(location.split(":")) for location in locations])]
chapters = [int(c) for c in chapters]
# select just the first verse for simplicity
verses = [int(vs.split(",")[0].split("-")[0]) for vs in verses]
books = [abbrev_to_book[abbrev] for abbrev in abbrevs]
book_ids = [abbrev_to_id[abbrev] for abbrev in abbrevs]

In [9]:
quotation_df["ot_b"] = book_ids
quotation_df["ot_book"] = books
quotation_df["ot_c"] = chapters
quotation_df["ot_v"] = verses

In [10]:
quotation_df = quotation_df[["ID", "OT", "ot_book", "ot_b", "ot_c", "ot_v",
                             "NT", "nt_book", "nt_b", "nt_c", "nt_v"]]

In [11]:
quotation_df

Unnamed: 0,ID,OT,ot_book,ot_b,ot_c,ot_v,NT,nt_book,nt_b,nt_c,nt_v
0,1,Isa 7:14,Isaiah,23,7,14,Mt 1:23,Matthew,40,1,23
1,2,Mic 5:2,Micah,33,5,2,Mt 2:6,Matthew,40,2,6
2,3,Ho 11:1,Hosea,28,11,1,Mt 2:15,Matthew,40,2,15
3,4,Jer 31:15,Jeremiah,24,31,15,Mt 2:18,Matthew,40,2,18
4,5,Isa 40:3,Isaiah,23,40,3,Mt 3:3,Matthew,40,3,3
5,6,De 8:3,Deuteronomy,5,8,3,Mt 4:4,Matthew,40,4,4
6,7,"Ps 91:11,12",Psalms,19,91,11,Mt 4:6,Matthew,40,4,6
7,8,De 6:16,Deuteronomy,5,6,16,Mt 4:7,Matthew,40,4,7
8,9,De 6:13,Deuteronomy,5,6,13,Mt 4:10,Matthew,40,4,10
9,10,De 10:20,Deuteronomy,5,10,20,Mt 4:10,Matthew,40,4,10


In [12]:
quotation_df.to_csv(QUOTATIONS_OUT_PATH, index=False)

In [13]:
quotation_ot_df = quotation_df.sort(["ot_b", "ot_c", "ot_v"])

In [14]:
quotation_ot_df

Unnamed: 0,ID,OT,ot_book,ot_b,ot_c,ot_v,NT,nt_book,nt_b,nt_c,nt_v
57,58,Ge 1:27,Genesis,1,1,27,Mr 10:6,Mark,41,10,6
265,266,"Ge 2:2,3",Genesis,1,2,2,Heb 4:4,Hebrews,58,4,4
246,247,Ge 2:4,Genesis,1,2,4,Eph 5:31,Ephesians,49,5,31
219,220,Ge 2:7,Genesis,1,2,7,1Co 15:45,1 Corinthians,46,15,45
29,30,Ge 2:24,Genesis,1,2,24,Mt 19:5,Matthew,40,19,5
58,59,Ge 2:24,Genesis,1,2,24,Mr 10:7,Mark,41,10,7
210,211,Ge 2:24,Genesis,1,2,24,1Co 6:16,1 Corinthians,46,6,16
123,124,Ge 12:1,Genesis,1,12,1,Ac 7:3,Acts,44,7,3
234,235,Ge 12:3,Genesis,1,12,3,Ga 3:8,Galatians,48,3,8
158,159,Ge 15:5,Genesis,1,15,5,Ro 4:18,Romans,45,4,18


In [15]:
quotation_ot_df.to_csv(QUOTATIONS_OT_OUT_PATH, index=False)

## Some data analysis

In [16]:
Counter(quotation_df["ot_book"]).most_common()

[('Psalms', 83),
 ('Isaiah', 62),
 ('Deuteronomy', 41),
 ('Exodus', 29),
 ('Genesis', 24),
 ('Leviticus', 12),
 ('Jeremiah', 12),
 ('Proverbs', 7),
 ('Zechariah', 7),
 ('Hosea', 6),
 ('Malachi', 5),
 ('Habakkuk', 4),
 ('2 Samuel', 3),
 ('1 Kings', 2),
 ('Joel', 2),
 ('Ezekiel', 2),
 ('Job', 1),
 ('Micah', 1),
 ('Haggai', 1),
 ('1 Samuel', 1),
 ('Joshua', 1),
 ('Nahum', 1)]

In [17]:
Counter(quotation_df["nt_book"]).most_common()

[('Romans', 62),
 ('Matthew', 49),
 ('Hebrews', 37),
 ('Acts', 29),
 ('Luke', 24),
 ('Mark', 23),
 ('1 Corinthians', 18),
 ('John', 17),
 ('2 Corinthians', 11),
 ('Galatians', 10),
 ('1 Peter', 9),
 ('Revelation', 6),
 ('Ephesians', 6),
 ('James', 4),
 ('1 Timothy', 1),
 ('2 Peter', 1)]

In [18]:
ot_book_chapters = [" ".join(book_chapter) for book_chapter in zip(quotation_df["ot_book"],
                                                                   [str(c) for c in list(quotation_df["ot_c"])])]

In [19]:
nt_book_chapters = [" ".join(book_chapter) for book_chapter in zip(quotation_df["nt_book"],
                                                                   [str(c) for c in list(quotation_df["nt_c"])])]

In [20]:
Counter(ot_book_chapters).most_common(30)

[('Exodus 20', 9),
 ('Deuteronomy 5', 8),
 ('Isaiah 53', 8),
 ('Psalms 118', 8),
 ('Psalms 110', 8),
 ('Isaiah 40', 7),
 ('Leviticus 19', 7),
 ('Deuteronomy 6', 7),
 ('Genesis 2', 6),
 ('Psalms 22', 5),
 ('Deuteronomy 32', 5),
 ('Deuteronomy 25', 5),
 ('Psalms 69', 5),
 ('Isaiah 6', 5),
 ('Jeremiah 31', 4),
 ('Psalms 2', 4),
 ('Exodus 3', 4),
 ('Genesis 15', 4),
 ('Psalms 95', 4),
 ('Isaiah 28', 4),
 ('Isaiah 29', 4),
 ('Isaiah 52', 4),
 ('Isaiah 49', 3),
 ('Isaiah 42', 3),
 ('Jeremiah 7', 3),
 ('Genesis 22', 3),
 ('Psalms 8', 3),
 ('Psalms 6', 3),
 ('Malachi 3', 3),
 ('Proverbs 3', 3)]

In [21]:
Counter(nt_book_chapters).most_common(30)

[('Romans 9', 12),
 ('Romans 10', 11),
 ('Romans 3', 9),
 ('Acts 7', 9),
 ('Matthew 4', 7),
 ('Hebrews 1', 7),
 ('Acts 13', 7),
 ('Mark 12', 6),
 ('Matthew 21', 6),
 ('1 Corinthians 15', 6),
 ('Luke 4', 6),
 ('Galatians 3', 6),
 ('Romans 15', 6),
 ('Romans 11', 6),
 ('Romans 4', 5),
 ('Matthew 22', 5),
 ('Mark 7', 5),
 ('Hebrews 2', 5),
 ('2 Corinthians 6', 5),
 ('1 Peter 2', 5),
 ('Hebrews 12', 5),
 ('John 19', 4),
 ('Acts 2', 4),
 ('Matthew 5', 4),
 ('Matthew 15', 4),
 ('Hebrews 10', 4),
 ('Mark 11', 3),
 ('Mark 10', 3),
 ('Luke 20', 3),
 ('John 15', 3)]

## Include actual text

In [22]:
text_all = pandas.read_csv(TEXT_PATH)

In [23]:
text_dict = dict(zip(text_all["id"], text_all["t"]))

In [24]:
ot_ids = [10**6*t[0] + 10**3*t[1] + t[2] for t in
          zip(quotation_df["ot_b"], quotation_df["ot_c"], quotation_df["ot_v"])]
ot_verse_text = [text_dict[ot_id] for ot_id in ot_ids]

In [25]:
nt_ids = [10**6*t[0] + 10**3*t[1] + t[2] for t in
          zip(quotation_df["nt_b"], quotation_df["nt_c"], quotation_df["nt_v"])]
nt_verse_text = [text_dict[nt_id] for nt_id in nt_ids]

In [26]:
quotation_text_df = pandas.DataFrame({"nt_ref": quotation_df["NT"], "ot_ref": quotation_df["OT"],
                                      "ot_text": ot_verse_text, "nt_text": nt_verse_text})

In [27]:
ot_ids_2 = [10**6*t[0] + 10**3*t[1] + t[2] for t in
            zip(quotation_ot_df["ot_b"], quotation_ot_df["ot_c"], quotation_ot_df["ot_v"])]
ot_verse_text_2 = [text_dict[ot_id] for ot_id in ot_ids_2]

In [28]:
nt_ids_2 = [10**6*t[0] + 10**3*t[1] + t[2] for t in
            zip(quotation_ot_df["nt_b"], quotation_ot_df["nt_c"], quotation_ot_df["nt_v"])]
nt_verse_text_2 = [text_dict[nt_id] for nt_id in nt_ids_2]

In [29]:
quotation_text_df_2 = pandas.DataFrame({"nt_ref": quotation_ot_df["NT"], "ot_ref": quotation_ot_df["OT"],
                                        "ot_text": ot_verse_text_2, "nt_text": nt_verse_text_2})
quotation_text_df_2 = quotation_text_df_2[["ot_ref", "ot_text", "nt_ref", "nt_text"]]

In [30]:
quotation_text_df.to_csv(QUOTATIONS_TEXT_OUT_PATH, index=False)
quotation_text_df_2.to_csv(QUOTATIONS_TEXT_OT_OUT_PATH, index=False)

In [31]:
# Pretty CSVs are better for viewing on GitHub preview
quotation_text_df[["nt_ref", "ot_ref", "ot_text"]].to_csv(os.path.join(OUTPUT_DIR, "quotations_text_pretty.csv"))
quotation_text_df_2[["ot_ref", "nt_ref", "nt_text"]].to_csv(os.path.join(OUTPUT_DIR, "quotations_ot_text_pretty.csv"))

In [32]:
quotation_text_df

Unnamed: 0,nt_ref,nt_text,ot_ref,ot_text
0,Mt 1:23,"Behold, a virgin shall be with child, and shal...",Isa 7:14,Therefore the Lord himself shall give you a si...
1,Mt 2:6,"And thou Bethlehem, in the land of Juda, art n...",Mic 5:2,"But thou, Bethlehem Ephratah, though thou be l..."
2,Mt 2:15,And was there until the death of Herod: that i...,Ho 11:1,"When Israel was a child, then I loved him, and..."
3,Mt 2:18,"In Rama was there a voice heard, lamentation, ...",Jer 31:15,Thus saith the LORD; A voice was heard in Rama...
4,Mt 3:3,For this is he that was spoken of by the proph...,Isa 40:3,The voice of him that crieth in the wilderness...
5,Mt 4:4,"But he answered and said, It is written, Man s...",De 8:3,"And he humbled thee, and suffered thee to hung..."
6,Mt 4:6,"And saith unto him, If thou be the Son of God,...","Ps 91:11,12","For he shall give his angels charge over thee,..."
7,Mt 4:7,"Jesus said unto him, It is written again, Thou...",De 6:16,"Ye shall not tempt the LORD your God, as ye te..."
8,Mt 4:10,"Then saith Jesus unto him, Get thee hence, Sat...",De 6:13,"Thou shalt fear the LORD thy God, and serve hi..."
9,Mt 4:10,"Then saith Jesus unto him, Get thee hence, Sat...",De 10:20,Thou shalt fear the LORD thy God; him shalt th...


In [33]:
quotation_text_df_2

Unnamed: 0,ot_ref,ot_text,nt_ref,nt_text
57,Ge 1:27,"So God created man in his own image, in the im...",Mr 10:6,But from the beginning of the creation God mad...
265,"Ge 2:2,3",And on the seventh day God ended his work whic...,Heb 4:4,For he spake in a certain place of the seventh...
246,Ge 2:4,These are the generations of the heavens and o...,Eph 5:31,For this cause shall a man leave his father an...
219,Ge 2:7,And the LORD God formed man of the dust of the...,1Co 15:45,"And so it is written, The first man Adam was m..."
29,Ge 2:24,Therefore shall a man leave his father and his...,Mt 19:5,"And said, For this cause shall a man leave fat..."
58,Ge 2:24,Therefore shall a man leave his father and his...,Mr 10:7,For this cause shall a man leave his father an...
210,Ge 2:24,Therefore shall a man leave his father and his...,1Co 6:16,What? know ye not that he which is joined to a...
123,Ge 12:1,"Now the LORD had said unto Abram, Get thee out...",Ac 7:3,"And said unto him, Get thee out of thy country..."
234,Ge 12:3,"And I will bless them that bless thee, and cur...",Ga 3:8,"And the scripture, foreseeing that God would j..."
158,Ge 15:5,"And he brought him forth abroad, and said, Loo...",Ro 4:18,"Who against hope believed in hope, that he mig..."
