# Fetch the Bible in various translations and prepare it

The Bible are available at https://github.com/BibleNLP and they are aligned verse by verse
seperated by newlines.  The reference for them are in vref.txt and each version is in their own
file.  We can download them and put them into a pandas dataframe

In [68]:
import requests
import pandas as pd

def download_references():
  
    url = 'https://github.com/BibleNLP/ebible/blob/main/metadata/vref.txt?raw=true'
    r = requests.get(url)
    if r.status_code != 200:
        raise Exception(f'Failed to download {url}')

    # create a dataframe and read each line into a row
    df = pd.DataFrame(r.text.splitlines())
    
    # for each row extract the first 3 characters as book
    df['book'] = df[0].str[:3]
    # for each row extract the 4th character up till : as chapter
    df['chapter'] = df[0].str[3:].str.split(':').str[0]
    # and the remainder as verse
    df['verse'] = df[0].str[3:].str.split(':').str[1]

    return df


def clean_filename(filename):
    """ Security function to ensure filename is a valid filename of a-zA-Z0-9_- """
    return ''.join([c for c in filename if c.isalnum() or c in '-_'])

# download bible from https://github.com/BibleNLP/ebible/blob/main/corpus/eng-eng-web.txt?raw=true
# and save it as bible.txt
def download_bible_to_dataframe(language='eng', version='eng-web', df=None):
    """
    Download a bible version from BibleNLP and save it to a dataframe
    :param language: language code (3 char code - check filename on BibleNLP
    :param version: version code (the remaining name of the file - check filename on BibleNLP)
    :param df: optional dataframe to append to (if empty will initialize a new dataframe)
    :return: dataframe with the bible version added

    """
    if df is None:
        df = download_references()

    # for security reasons ensure versions is a valid filename of a-zA-Z0-9_-
    version = clean_filename(version)
    language = clean_filename(language)
    
    url = f'https://github.com/BibleNLP/ebible/blob/main/corpus/{language}-{version}.txt?raw=true'
    r = requests.get(url)
    if r.status_code != 200:
        raise Exception(f'Failed to download {url}')
    df[version] = r.text.splitlines()
    return df




Unnamed: 0,0,book,chapter,verse,eng-web
0,GEN 1:1,GEN,1,1,"In the beginning, God created the heavens and ..."
1,GEN 1:2,GEN,1,2,The earth was formless and empty. Darkness was...
2,GEN 1:3,GEN,1,3,"God said, “Let there be light,” and there was ..."
3,GEN 1:4,GEN,1,4,"God saw the light, and saw that it was good. G..."
4,GEN 1:5,GEN,1,5,"God called the light “day”, and the darkness h..."
...,...,...,...,...,...
41894,ENO 42:12,ENO,42,12,
41895,ENO 42:13,ENO,42,13,
41896,ENO 42:14,ENO,42,14,
41897,ENO 42:15,ENO,42,15,


In [None]:
bible = download_bible_to_dataframe('eng', 'eng-web')
bible

In [69]:
bible[bible['book'] == 'JHN'][['book', 'chapter', 'verse', 'eng-web']]

Unnamed: 0,book,chapter,verse,eng-web
26113,JHN,1,1,"In the beginning was the Word, and the Word wa..."
26114,JHN,1,2,The same was in the beginning with God.
26115,JHN,1,3,"All things were made through him. Without him,..."
26116,JHN,1,4,"In him was life, and the life was the light of..."
26117,JHN,1,5,"The light shines in the darkness, and the dark..."
...,...,...,...,...
26987,JHN,21,21,"Peter, seeing him, said to Jesus, “Lord, what ..."
26988,JHN,21,22,"Jesus said to him, “If I desire that he stay u..."
26989,JHN,21,23,This saying therefore went out among the broth...
26990,JHN,21,24,This is the disciple who testifies about these...


In [72]:
# Add a few more versions to it
bible = download_bible_to_dataframe('eng','eng-asv', bible)
bible = download_bible_to_dataframe('eng','eng-kjv2006', bible)
bible = download_bible_to_dataframe('eng','engBBE', bible)
bible

Unnamed: 0,0,book,chapter,verse,eng-web,eng-asv,eng-kjv2006,engBBE
0,GEN 1:1,GEN,1,1,"In the beginning, God created the heavens and ...",In the beginning God created the heavens and t...,In the beginning God created the heaven and th...,At the first God made the heaven and the earth.
1,GEN 1:2,GEN,1,2,The earth was formless and empty. Darkness was...,And the earth was waste and void; and darkness...,"And the earth was without form, and void; and ...",And the earth was waste and without form; and ...
2,GEN 1:3,GEN,1,3,"God said, “Let there be light,” and there was ...","And God said, Let there be light: and there wa...","And God said, Let there be light: and there wa...","And God said, Let there be light: and there wa..."
3,GEN 1:4,GEN,1,4,"God saw the light, and saw that it was good. G...","And God saw the light, that it was good: and G...","And God saw the light, that it was good: and G...","And God, looking on the light, saw that it was..."
4,GEN 1:5,GEN,1,5,"God called the light “day”, and the darkness h...","And God called the light Day, and the darkness...","And God called the light Day, and the darkness...","Naming the light, Day, and the dark, Night. An..."
...,...,...,...,...,...,...,...,...
41894,ENO 42:12,ENO,42,12,,,,
41895,ENO 42:13,ENO,42,13,,,,
41896,ENO 42:14,ENO,42,14,,,,
41897,ENO 42:15,ENO,42,15,,,,


## Encode the Bible to a cipher text
When testing chatGPT (ver 3.5) it knew Bible Verses. 

 > Q. which versions of the english bible do you know

```
As an AI language model, I have been trained on a vast amount of text data, including various versions and translations of the Bible. Here are some of the most popular versions of the English Bible that I'm familiar with:

King James Version (KJV)
New International Version (NIV)
New American Standard Bible (NASB)
English Standard Version (ESV)
New Revised Standard Version (NRSV)
Revised Standard Version (RSV)
New King James Version (NKJV)
Amplified Bible (AMP)
Contemporary English Version (CEV)
New Living Translation (NLT)
The Message (MSG)

There are many other English translations of the Bible available as well, but these are some of the most commonly used and well-known versions.
```


> Q. what is phil 4:2 in niv

```
Philippians 4:2 in the New International Version (NIV) reads:

"I plead with Euodia and I plead with Syntyche to be of the same mind in the Lord."
```

So it "knows" Bible verses already, thus if you asked it to translate back into English it will cheat (by just doing a lookup as those are the likely next words)

Likewise it has knowledge of some other languages

> Q. Continue this sentence, "Ngam majjum banndiraaɓe am ɓe ngiɗumi, on njewnii kam sanne. Onon laatiiɓe welwelo am e lefol am, onon ɓe piiranmi gabaare. Hano nii, giɗaaɓe am, tiiɗee no wooɗiri nder tokkol Joomiraawo. 2Mi ŋaarake Ewodi e Sintis laatoo daane worre ngam Joomiraawo."

> The complete sentence would be: "Ngam majjum banndiraaɓe am ɓe ngiɗumi, on njewnii kam sanne. Onon laatiiɓe welwelo am e lefol am, onon ɓe piiranmi gabaare. Hano nii, giɗaaɓe am, tiiɗee no wooɗiri nder tokkol Joomiraawo. Mi ŋaarake Ewodi e Sintis laatoo daane worre ngam Joomiraawo" which translates to "If there are any quarrels between you, my dear friends, I beg of you to make peace. And now, dear brothers and sisters, one final thing. Fix your thoughts on what is true, and honorable, and right, and pure, and lovely, and admirable. Think about things that are excellent and worthy of praise. I urge Euodia and Syntyche to be of the same mind in the Lord." This is a combination of two different passages from the New Testament of the Bible: Philippians 4:2 and Philippians 4:8, translated into Nigerian Fulfulde.

> Q. translate Ps 23 to Nigerian Fulfulde

```
Here's the Nigerian Fulfulde translation of Psalm 23:

Joom baako waɗi : "Mi worgo naange.
Yeeso waɗi mi jeyaaɗo to, haa mi heewde so mi hebbina ngu.
Mi heewde so mi nduttino waɗi bannge nii mi laana ko mbasal.
Naaƴa nde ne njogi sabaabu, ndeen ko mbasal sahaa ngam mayre, so mi nduttina waɗi ngalaa.
Naaƴa ngalaa ne so gagganaani ngam kaañirde diiwaan-maɓɓe, ngam njaaɓi haaɗti.
Adi tooɗto woni, haa i miijo mum ndiyam e daɓɓan ngalaa ummii."
In English, it reads:

The Lord is my shepherd, I lack nothing.
He makes me lie down in green pastures, he leads me beside quiet waters,
he refreshes my soul. He guides me along the right paths for his name's sake.
Even though I walk through the darkest valley, I will fear no evil, for you are with me; your rod and your staff, they comfort me.
You prepare a table before me in the presence of my enemies. You anoint my head with oil; my cup overflows.
Surely your goodness and love will follow me all the days of my life, and I will dwell in the house of the Lord forever.
```

Impressive, but is it cheating by just halucintating then making up the translation from the English, it seems so

> Q. translate from Nigerian Fulfude, "Yeeso waɗi mi jeyaaɗo to, haa mi heewde so mi hebbina ngu."

> A. "Yeeso waɗi mi jeyaaɗo to, haa mi heewde so mi hebbina ngu" in Nigerian Fulfulde translates to "He makes me lie down in green pastures, he leads me beside quiet waters." This is a phrase from Psalm 23:2 in the Bible.

Now switch to another chat sessions and it shows it is using the memory of the context and using the English, here is what it thinks of the same without the context

> Q. translate from Nigerian Fulfulde to English, "Yeeso waɗi mi jeyaaɗo to, haa mi heewde so mi hebbina ngu."
> A. The Nigerian Fulfulde phrase "Yeeso waɗi mi jeyaaɗo to, haa mi heewde so mi hebbina ngu" translates to English as "I am wandering around without direction, not knowing where I'm headed."

## Why use encoding
To get around this "cheating" we will create a new language.  For fun we are calling it "birrig" (for no reason).  It is a substitution cipher where we switch various letters for other letters, keeping the vowels and capitals and punctiation so it is somewhat readable.

Reading on chatGPT on a caesar cipher it is not able to consistently decode it after 3 letters.  We need to watch that GPT 4 doesn't fix this as it does better on many tasks but for now this should confuse it

So it should treat each word as an unknown and need to learn it as a new language.  In so doing we can simulate translating from English to birrig and then we have the cipher to see if it does the same.  

The weaknesses however are:
 - it is word for word translation and not requiring it to be reworded to choose best word
 - it is grammatically the same so it doesn't account for mechanics like changing word order for empasis in some langauges or cases like brother (older) is different from brother (younger).



In [74]:
# autoload changes to cipher.py
%load_ext autoreload
%autoreload 2
from lib.cipher import substitution_cipher
bible['birrig'] = bible['engBBE'].apply(substitution_cipher)
bible

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Unnamed: 0,0,book,chapter,verse,eng-web,eng-asv,eng-kjv2006,engBBE,birrig
0,GEN 1:1,GEN,1,1,"In the beginning, God created the heavens and ...",In the beginning God created the heavens and t...,In the beginning God created the heaven and th...,At the first God made the heaven and the earth.,El lxi sovzl Guw newi lxi xiemir erw lxi ievlx.
1,GEN 1:2,GEN,1,2,The earth was formless and empty. Darkness was...,And the earth was waste and void; and darkness...,"And the earth was without form, and void; and ...",And the earth was waste and without form; and ...,Erw lxi ievlx hez hezli erw holxual suvn; erw ...
2,GEN 1:3,GEN,1,3,"God said, “Let there be light,” and there was ...","And God said, Let there be light: and there wa...","And God said, Let there be light: and there wa...","And God said, Let there be light: and there wa...","Erw Guw zeow, Pil lxivi fi pogxl: erw lxivi he..."
3,GEN 1:4,GEN,1,4,"God saw the light, and saw that it was good. G...","And God saw the light, that it was good: and G...","And God saw the light, that it was good: and G...","And God, looking on the light, saw that it was...","Erw Guw, puucorg ur lxi pogxl, zeh lxel ol hez..."
4,GEN 1:5,GEN,1,5,"God called the light “day”, and the darkness h...","And God called the light Day, and the darkness...","And God called the light Day, and the darkness...","Naming the light, Day, and the dark, Night. An...","Renorg lxi pogxl, Wej, erw lxi wevc, Rogxl. Er..."
...,...,...,...,...,...,...,...,...,...
41894,ENO 42:12,ENO,42,12,,,,,
41895,ENO 42:13,ENO,42,13,,,,,
41896,ENO 42:14,ENO,42,14,,,,,
41897,ENO 42:15,ENO,42,15,,,,,


In [75]:
# confirm we can decypher it
bible['birrig'].apply(substitution_cipher, encode=False)

0          At the first God made the heaven and the earth.
1        And the earth was waste and without form; and ...
2        And God said, Let there be light: and there wa...
3        And God, looking on the light, saw that it was...
4        Naming the light, Day, and the dark, Night. An...
                               ...                        
41894                                                     
41895                                                     
41896                                                     
41897                                                     
41898                                                     
Name: birrig, Length: 41899, dtype: object

In [76]:
# Write the results to data/bible.csv
bible.to_csv('data/bible.csv', index=False)