In [1]:
import json
import minsearch
import openai
import pandas as pd
from openai import OpenAI
from tqdm.auto import tqdm

In [2]:
with open('/root/practice/logos/data/kjv.json', 'r') as f:
    bible_json = json.load(f)

In [3]:
with open('/root/practice/logos/data/video_data.json', 'r') as f:
    video_json = json.load(f)

In [4]:
bible = bible_json["verses"]

In [22]:
for document in bible:
    for key, value in document.items():
        document[key] = str(value)

In [23]:
from elasticsearch import Elasticsearch
es_client = Elasticsearch("http://localhost:9200")

In [26]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": { 
        "properties": {
            # Fields for Bible verses
            "book": { "type": "text" },  # Bible-specific field
            "book_name": { "type": "keyword" },  # Bible-specific field
            "chapter": { "type": "text" },  # Bible-specific field
            "verse": { "type": "text" },  # Bible-specific field
            
            # Fields for YouTube transcripts
            "video_id": { "type": "keyword" },  # Video-specific field
            "title": { "type": "text" },  # Video-specific field
            "publish_date": { "type": "date" },  # Video-specific field
            "author": { "type": "text" },  # Video-specific field

            # Common field for both types of documents
            "text": { "type": "text" }  # Both Bible verses and video transcripts share this
        }
    }
}

index_name = "final_db"

# Create the index
es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'final_db'})

In [5]:
documents = bible + video_json

In [6]:
with open("document.json", 'w') as f_out:
    json.dump(documents, f_out, indent=4)

In [28]:
for doc in tqdm(documents):
    # Ensure each document has the expected structure for indexing
    if 'book_name' in doc:  # It's a Bible verse
        # Prepare for indexing
        es_client.index(index=index_name, document=doc)
    elif 'video_id' in doc:  # It's a YouTube transcript
        # Prepare for indexing
        es_client.index(index=index_name, document=doc)

  0%|          | 0/36755 [00:00<?, ?it/s]

In [29]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "should": [  # Use 'should' to match either condition
                    {
                        "multi_match": {
                            "query": query,
                            "fields": ["text^4", "book_name", "chapter", "verse", "book"],
                            "type": "best_fields"
                        }
                    },
                    {
                        "multi_match": {
                            "query": query,
                            "fields": ["title", "text", "author"],  # Fields for video transcripts
                            "type": "best_fields"
                        }
                    }
                ]
            }
        }
    }

    # Execute the search query
    response = es_client.search(index=index_name, body=search_query)

    result_docs = []
    
    # Collect the results from the hits
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

In [36]:
def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        if 'book_name' in doc:  # This indicates it's a Bible verse
            context += (
                f"Book: {doc['book_name']}\n"
                f"Chapter: {doc['chapter']}\n"
                f"Verse: {doc['verse']}\n"
                f"Text: {doc['text']}\n"
            )
        elif 'title' in doc:  # This indicates it's a YouTube transcript
            context += (
                f"Video Title: {doc['title']}\n"
                f"Author: {doc['author']}\n"
                f"Published Date: {doc['publish_date']}\n"
                f"Transcript: {doc['text']}\n"
            )
    
    prompt = f"""
You are a knowledgeable Bible study assistant called "Logos". 
Your primary function is to provide accurate, insightful responses to users' questions about the Bible and related teachings. 
When answering a question, provide direct, scriptural answers and quote the relevant Bible verse(s) to support your response.
If the answer is not available in the Bible or the resources at hand, kindly let the user know in a polite and friendly manner without referencing any internal systems or sources.

Your tasks include:

1. Answering Questions: When a user asks a question related to the Bible or its teachings, respond with a clear and concise answer rooted in scripture. Always include relevant Bible verses and chapter citations to support your answer (e.g., *Genesis 1:1*).

2. Contextual Understanding: For any chapter or verse referenced, explain its significance within the Bible's broader narrative. Provide theological, historical, or cultural insights if applicable, and quote Bible verses to enrich the user's understanding.

3. Life Situations: When responding to questions about real-life situations (e.g., advice for overcoming fear, forgiveness, or patience), offer practical applications of the scriptures. Always support your advice by quoting where it's written in the Bible.

4. Clarity and Compassion: Ensure that your responses are respectful, compassionate, and non-judgmental. Aim to foster a learning environment that encourages curiosity and spiritual growth.

5. Friendly Guidance: If the Bible or provided materials don't directly answer the user's question, kindly inform them with a friendly phrase such as, "This specific answer may not be found in scripture, but you can explore related verses like...," and offer related suggestions.

---

QUESTION: {query}

BIBLE REFERENCES: {context}
""".strip()

    return prompt

In [37]:
def llm(prompt):
    response = client.chat.completions.create(
    model = 'gpt-4o-mini',
    messages = [{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [38]:
def rag(query):
    search_result = elastic_search(query)
    prompt = build_prompt(query, search_result)
    answer = llm(prompt)
    return answer

In [39]:
client = OpenAI()

In [44]:
rag("why did jesus connect anger with murder")

'Jesus connects anger with murder to highlight that both stem from a deeper issue related to valuing human life. In Matthew 5:21-22, He states, "You have heard that it was said to the ancients, \'You shall not commit murder.\' And I say to you that everyone who is angry with his brother will be guilty in the court." By equating anger and verbal insults with murder, Jesus emphasizes that such feelings and actions reflect contempt and disregard for others, which is contrary to respecting them as beings created in the image of God.\n\nThis connection reveals that the root cause of murder (anger and hatred) is just as significant and detrimental in God\'s sight. The teaching serves to challenge listeners to examine their hearts and attitudes toward others, suggesting that harboring anger can lead to behaviors that devalue another person\'s life. Thus, Jesus calls His followers to recognize the weight of their words and emotions, ultimately encouraging them to cultivate respect and love for