In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

In [3]:
kjv_csv = pd.read_csv('data/kjv.csv')
kjv_csv

Unnamed: 0,Verse ID,Book Name,Book Number,Chapter,Verse,Text
0,1,Genesis,1,1,1,¶ In the beginning God created the heaven and ...
1,2,Genesis,1,1,2,"And the earth was without form, and void; and ..."
2,3,Genesis,1,1,3,"¶ And God said, Let there be light: and there ..."
3,4,Genesis,1,1,4,"And God saw the light, that [it was] good: and..."
4,5,Genesis,1,1,5,"And God called the light Day, and the darkness..."
...,...,...,...,...,...,...
31097,31098,Revelation,66,22,17,"And the Spirit and the bride say, Come. And le..."
31098,31099,Revelation,66,22,18,For I testify unto every man that heareth the ...
31099,31100,Revelation,66,22,19,And if any man shall take away from the words ...
31100,31101,Revelation,66,22,20,"¶ He which testifieth these things saith, ‹Sur..."


In [4]:
symbol = kjv_csv['Text'][0][0]
symbol

'¶'

In [5]:
patt = r"[^\w,;:'()\s.!?]"
cleaned_csv = kjv_csv['Text'].str.replace(patt, '', regex=True)
cleaned_kjv = kjv_csv.assign(cleaned=cleaned_csv)
cleaned_kjv

Unnamed: 0,Verse ID,Book Name,Book Number,Chapter,Verse,Text,cleaned
0,1,Genesis,1,1,1,¶ In the beginning God created the heaven and ...,In the beginning God created the heaven and t...
1,2,Genesis,1,1,2,"And the earth was without form, and void; and ...","And the earth was without form, and void; and ..."
2,3,Genesis,1,1,3,"¶ And God said, Let there be light: and there ...","And God said, Let there be light: and there w..."
3,4,Genesis,1,1,4,"And God saw the light, that [it was] good: and...","And God saw the light, that it was good: and G..."
4,5,Genesis,1,1,5,"And God called the light Day, and the darkness...","And God called the light Day, and the darkness..."
...,...,...,...,...,...,...,...
31097,31098,Revelation,66,22,17,"And the Spirit and the bride say, Come. And le...","And the Spirit and the bride say, Come. And le..."
31098,31099,Revelation,66,22,18,For I testify unto every man that heareth the ...,For I testify unto every man that heareth the ...
31099,31100,Revelation,66,22,19,And if any man shall take away from the words ...,And if any man shall take away from the words ...
31100,31101,Revelation,66,22,20,"¶ He which testifieth these things saith, ‹Sur...","He which testifieth these things saith, Surel..."


In [6]:
genesis_by_verse_corpus = cleaned_kjv[cleaned_kjv['Book Name'] == 'Genesis']['cleaned']
genesis_by_verse_corpus

0        In the beginning God created the heaven and t...
1       And the earth was without form, and void; and ...
2        And God said, Let there be light: and there w...
3       And God saw the light, that it was good: and G...
4       And God called the light Day, and the darkness...
                              ...                        
1528     And Joseph dwelt in Egypt, he, and his father...
1529    And Joseph saw Ephraim's children of the third...
1530    And Joseph said unto his brethren, I die: and ...
1531    And Joseph took an oath of the children of Isr...
1532    So Joseph died, being an hundred and ten years...
Name: cleaned, Length: 1533, dtype: object

In [7]:
tfidf = TfidfVectorizer()

X = tfidf.fit_transform(genesis_by_verse_corpus)

genesis_tfidf_df = pd.DataFrame(data=X.toarray(), columns=tfidf.get_feature_names_out())
genesis_by_verse = cleaned_kjv[cleaned_kjv['Book Name'] == 'Genesis'].assign(result=genesis_tfidf_df.T.idxmax())
genesis_by_verse.head(10)

Unnamed: 0,Verse ID,Book Name,Book Number,Chapter,Verse,Text,cleaned,result
0,1,Genesis,1,1,1,¶ In the beginning God created the heaven and ...,In the beginning God created the heaven and t...,beginning
1,2,Genesis,1,1,2,"And the earth was without form, and void; and ...","And the earth was without form, and void; and ...",face
2,3,Genesis,1,1,3,"¶ And God said, Let there be light: and there ...","And God said, Let there be light: and there w...",light
3,4,Genesis,1,1,4,"And God saw the light, that [it was] good: and...","And God saw the light, that it was good: and G...",light
4,5,Genesis,1,1,5,"And God called the light Day, and the darkness...","And God called the light Day, and the darkness...",day
5,6,Genesis,1,1,6,"¶ And God said, Let there be a firmament in th...","And God said, Let there be a firmament in the...",waters
6,7,Genesis,1,1,7,"And God made the firmament, and divided the wa...","And God made the firmament, and divided the wa...",firmament
7,8,Genesis,1,1,8,And God called the firmament Heaven. And the e...,And God called the firmament Heaven. And the e...,firmament
8,9,Genesis,1,1,9,"¶ And God said, Let the waters under the heave...","And God said, Let the waters under the heaven...",let
9,10,Genesis,1,1,10,And God called the dry [land] Earth; and the g...,And God called the dry land Earth; and the gat...,called


In [8]:
genesis_by_verse[genesis_by_verse['Chapter']==39]

Unnamed: 0,Verse ID,Book Name,Book Number,Chapter,Verse,Text,cleaned,result
1150,1151,Genesis,1,39,1,¶ And Joseph was brought down to Egypt; and Po...,And Joseph was brought down to Egypt; and Pot...,down
1151,1152,Genesis,1,39,2,"And the LORD was with Joseph, and he was a pro...","And the LORD was with Joseph, and he was a pro...",was
1152,1153,Genesis,1,39,3,And his master saw that the LORD [was] with hi...,"And his master saw that the LORD was with him,...",that
1153,1154,Genesis,1,39,4,"And Joseph found grace in his sight, and he se...","And Joseph found grace in his sight, and he se...",he
1154,1155,Genesis,1,39,5,And it came to pass from the time [that] he ha...,And it came to pass from the time that he had ...,house
1155,1156,Genesis,1,39,6,And he left all that he had in Joseph's hand; ...,And he left all that he had in Joseph's hand; ...,he
1156,1157,Genesis,1,39,7,"¶ And it came to pass after these things, that...","And it came to pass after these things, that ...",cast
1157,1158,Genesis,1,39,8,"But he refused, and said unto his master's wif...","But he refused, and said unto his master's wif...",master
1158,1159,Genesis,1,39,9,[There is] none greater in this house than I; ...,There is none greater in this house than I; ne...,wickedness
1159,1160,Genesis,1,39,10,"And it came to pass, as she spake to Joseph da...","And it came to pass, as she spake to Joseph da...",her


In [9]:
bible_by_chapter = cleaned_kjv.groupby(
    ['Book Number', 'Book Name', 'Chapter']
)[['Book Number', 'Book Name', 'Chapter', 'cleaned']].apply(lambda x: x['cleaned'].str.cat(sep=' ')).to_frame()
bible_by_chapter = bible_by_chapter.rename(columns={0:'Text'})
bible_by_chapter

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Text
Book Number,Book Name,Chapter,Unnamed: 3_level_1
1,Genesis,1,In the beginning God created the heaven and t...
1,Genesis,2,"Thus the heavens and the earth were finished,..."
1,Genesis,3,Now the serpent was more subtil than any beas...
1,Genesis,4,And Adam knew Eve his wife; and she conceived...
1,Genesis,5,This is the book of the generations of Adam. ...
...,...,...,...
66,Revelation,18,And after these things I saw another angel co...
66,Revelation,19,And after these things I heard a great voice ...
66,Revelation,20,"And I saw an angel come down from heaven, hav..."
66,Revelation,21,And I saw a new heaven and a new earth: for t...


In [10]:
genesis_by_chapter = bible_by_chapter[bible_by_chapter.index.get_level_values(0) == 1].reset_index()
genesis_by_chapter

Unnamed: 0,Book Number,Book Name,Chapter,Text
0,1,Genesis,1,In the beginning God created the heaven and t...
1,1,Genesis,2,"Thus the heavens and the earth were finished,..."
2,1,Genesis,3,Now the serpent was more subtil than any beas...
3,1,Genesis,4,And Adam knew Eve his wife; and she conceived...
4,1,Genesis,5,This is the book of the generations of Adam. ...
5,1,Genesis,6,"And it came to pass, when men began to multip..."
6,1,Genesis,7,"And the LORD said unto Noah, Come thou and al..."
7,1,Genesis,8,"And God remembered Noah, and every living thi..."
8,1,Genesis,9,"And God blessed Noah and his sons, and said u..."
9,1,Genesis,10,Now these are the generations of the sons of ...


In [11]:
def tfidf_convert(corpus):
    tfidf = TfidfVectorizer()

    X = tfidf.fit_transform(corpus)

    tfidf_df = pd.DataFrame(data=X.toarray(), columns=tfidf.get_feature_names_out())

    return tfidf_df.T.idxmax()

In [12]:
bible_by_chapter

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Text
Book Number,Book Name,Chapter,Unnamed: 3_level_1
1,Genesis,1,In the beginning God created the heaven and t...
1,Genesis,2,"Thus the heavens and the earth were finished,..."
1,Genesis,3,Now the serpent was more subtil than any beas...
1,Genesis,4,And Adam knew Eve his wife; and she conceived...
1,Genesis,5,This is the book of the generations of Adam. ...
...,...,...,...
66,Revelation,18,And after these things I saw another angel co...
66,Revelation,19,And after these things I heard a great voice ...
66,Revelation,20,"And I saw an angel come down from heaven, hav..."
66,Revelation,21,And I saw a new heaven and a new earth: for t...


In [13]:
bible_by_chapter = bible_by_chapter.reset_index()
bible_by_chapter

Unnamed: 0,Book Number,Book Name,Chapter,Text
0,1,Genesis,1,In the beginning God created the heaven and t...
1,1,Genesis,2,"Thus the heavens and the earth were finished,..."
2,1,Genesis,3,Now the serpent was more subtil than any beas...
3,1,Genesis,4,And Adam knew Eve his wife; and she conceived...
4,1,Genesis,5,This is the book of the generations of Adam. ...
...,...,...,...,...
1184,66,Revelation,18,And after these things I saw another angel co...
1185,66,Revelation,19,And after these things I heard a great voice ...
1186,66,Revelation,20,"And I saw an angel come down from heaven, hav..."
1187,66,Revelation,21,And I saw a new heaven and a new earth: for t...


In [14]:
bible_by_book = bible_by_chapter.groupby(
    ['Book Number', 'Book Name']
        )[['Book Number', 'Book Name', 'Text']].apply(lambda x: x['Text'].str.cat(sep=' ')).to_frame()
bible_by_book = bible_by_book.rename(columns={0:'Text'})
bible_by_book

Unnamed: 0_level_0,Unnamed: 1_level_0,Text
Book Number,Book Name,Unnamed: 2_level_1
1,Genesis,In the beginning God created the heaven and t...
2,Exodus,Now these are the names of the children of Is...
3,Leviticus,"And the LORD called unto Moses, and spake unt..."
4,Numbers,And the LORD spake unto Moses in the wilderne...
5,Deuteronomy,These be the words which Moses spake unto all...
...,...,...
62,1 John,"That which was from the beginning, which we h..."
63,2 John,The elder unto the elect lady and her childre...
64,3 John,"The elder unto the wellbeloved Gaius, whom I ..."
65,Jude,"Jude, the servant of Jesus Christ, and brothe..."


In [15]:
tfidf_convert(bible_by_book['Text'])

0     and
1     the
2     the
3     the
4     the
     ... 
61    the
62    the
63    the
64    the
65    and
Length: 66, dtype: object

In [16]:
tfidf = TfidfVectorizer(analyzer='word', stop_words='english')

X = tfidf.fit_transform(bible_by_book['Text'])

tfidf_df = pd.DataFrame(data=X.toarray(), columns=tfidf.get_feature_names_out())

In [17]:
top_words = tfidf_df.apply(lambda x: list(x.sort_values(ascending=False)[:10].index), axis=1).to_frame().rename(columns={0:'Top Words'}).head()
top_words

Unnamed: 0,Top Words
0,"[said, unto, thou, thy, jacob, joseph, thee, s..."
1,"[shall, moses, thou, unto, lord, shalt, aaron,..."
2,"[shall, offering, unto, lord, priest, ye, uncl..."
3,"[shall, unto, lord, offering, moses, israel, c..."
4,"[thy, thou, lord, shall, shalt, thee, unto, go..."


In [18]:
pd.set_option('display.max_colwidth', None)
top_words

Unnamed: 0,Top Words
0,"[said, unto, thou, thy, jacob, joseph, thee, shall, land, abraham]"
1,"[shall, moses, thou, unto, lord, shalt, aaron, pharaoh, said, israel]"
2,"[shall, offering, unto, lord, priest, ye, unclean, aaron, burnt, altar]"
3,"[shall, unto, lord, offering, moses, israel, children, congregation, aaron, tabernacle]"
4,"[thy, thou, lord, shall, shalt, thee, unto, god, land, ye]"


In [19]:
top_words.head(10)

Unnamed: 0,Top Words
0,"[said, unto, thou, thy, jacob, joseph, thee, shall, land, abraham]"
1,"[shall, moses, thou, unto, lord, shalt, aaron, pharaoh, said, israel]"
2,"[shall, offering, unto, lord, priest, ye, unclean, aaron, burnt, altar]"
3,"[shall, unto, lord, offering, moses, israel, children, congregation, aaron, tabernacle]"
4,"[thy, thou, lord, shall, shalt, thee, unto, god, land, ye]"


In [22]:
cleaned_csv

0                                                                                                                                                                 In the beginning God created the heaven and the earth.
1                                                                         And the earth was without form, and void; and darkness was upon the face of the deep. And the Spirit of God moved upon the face of the waters.
2                                                                                                                                                                 And God said, Let there be light: and there was light.
3                                                                                                                                  And God saw the light, that it was good: and God divided the light from the darkness.
4                                                                                                    And God called the light Day, a

In [49]:
def top_10_tfidf(csv_file_path):
    
    bible_csv = pd.read_csv(csv_file_path)
    patt = r"[^\w,;:'()\s.!?]"
    cleaned_csv = kjv_csv['Text'].str.replace(patt, '', regex=True)
    bible_csv = bible_csv.assign(clean=cleaned_csv)
    bible_csv = bible_csv.groupby(
    ['Book Number', 'Book Name']
        )[['Book Number', 'Book Name', 'clean']].apply(lambda x: x['clean'].str.cat(sep=' ')).to_frame().rename(columns={0:'Text'})

    tfidf = TfidfVectorizer(analyzer='word', stop_words='english')
    X = tfidf.fit_transform(bible_csv['Text'])
    tfidf_df = pd.DataFrame(data=X.toarray(), columns=tfidf.get_feature_names_out())
    
    top_words = (tfidf_df
                 .apply(lambda x: list(x.sort_values(ascending=False)[:10].index), axis=1)
                 .to_frame()
                 .rename(columns={0:f'Top Words ({csv_file_path[5:-4].upper()})'}))
    return top_words


In [50]:
test_csv = 'data/kjv.csv'
top_10_tfidf(test_csv)

Unnamed: 0,Top Words (KJV)
0,"[said, unto, thou, thy, jacob, joseph, thee, shall, land, abraham]"
1,"[shall, moses, thou, unto, lord, shalt, aaron, pharaoh, said, israel]"
2,"[shall, offering, unto, lord, priest, ye, unclean, aaron, burnt, altar]"
3,"[shall, unto, lord, offering, moses, israel, children, congregation, aaron, tabernacle]"
4,"[thy, thou, lord, shall, shalt, thee, unto, god, land, ye]"
...,...
61,"[god, love, world, ye, know, hath, son, sin, jesus, unto]"
62,"[lady, christ, truth, doctrine, speed, elect, love, unto, abideth, father]"
63,"[truth, thou, church, doeth, thee, beloved, record, friends, forbiddeth, malicious]"
64,"[ungodly, christ, jesus, unto, lord, reserved, lusts, god, beloved, eternal]"
