In [1]:
import pandas as pd
import numpy as np
import os
from datasets import load_dataset
from dotenv import load_dotenv
import google.generativeai as genai
from minsearch import Index

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
genai.configure(api_key=os.environ["API_KEY"])

In [3]:
# Prepare dataset
# using gotquestions dataset

import json

with open("datasets/questions_answer.json", 'rt') as f_in:
    data = json.load(f_in)

print(data[0])

{'category': 'Questions about God', 'sub.category': 'The Nature of God', 'question': 'What is God like?', 'answer': 'Answer\nEvery culture in the history of the world has had some concept of what God is like. Some have assumed that God is in control of the weather and have made images of a storm god throwing lightning bolts around (Baal worship in Canaan). Some have assumed that God is very powerful, and so they worshiped the most powerful thing they could see, the sun (Ra worship in Egypt). Others have assumed that God is everywhere and therefore have worshiped everything (pantheism in Stoic philosophy). Some have assumed that God is unknowable and have turned to agnosticism or, just to cover their bases, have worshiped “An Unknown God” (Acts 17:23).\nThe problem with each of these assumptions is that they only get part of the picture of who God is. Yes, God is in control of the weather, but He is also in control of so much more. He is powerful, but much more powerful than the sun. He

In [4]:
# Clean answer key in data
for i in range(len(data)):
    data[i]['answer'] = data[i]['answer'].replace('Answer\n', '')

data[3]

{'category': 'Questions about God',
 'sub.category': 'The Nature of God',
 'question': 'What are the attributes of God?',
 'answer': 'The Bible, God’s Word, tells us what God is like and what He is not like. Without the authority of the Bible, any attempt to explain God’s attributes (inherent qualities) would be no better than an opinion, which by itself is often incorrect, especially in understanding God (Job 42:7). To say that it is important for us to try to understand what God is like is a huge understatement. Failure to do so can cause us to set up, chase after, and worship false gods contrary to His will (Exodus 20:3-5).\n\nOnly what God has chosen to reveal of Himself can be known. One of God’s attributes or qualities is “light,” meaning that He is self-revealing in information of Himself (Isaiah 60:19; James 1:17). The fact that God has revealed knowledge of Himself should not be neglected (Hebrews 4:1). Creation, the Bible, and the Word made flesh (Jesus Christ) will help us t

In [5]:

index = Index(
    text_fields = ["question", "answer", "sub.category"],
    keyword_fields = ["category"]
)

index.fit(data)

<minsearch.minsearch.Index at 0x7bdff5094d40>

In [6]:
def search(query):
# filter_dict = {"category": "Questions about God"}
    boost_dict = {"question": 3, "answer": 1, "sub.category": 1}

    return index.search(query, boost_dict, num_results=5)

In [7]:
def build_prompt(query, search_results):
    prompt_template = """
    You are a spiritual assistant, your goal is to answer questions about christian spirituality like God, Jesus, Holy Spirit, and Life as a believer of this spirituality.
    Answer the QUESTIONS based on the CONTEXT when answering the QUESTION.
    Answer the questions as if you are a spiritual assistant using the CONTEXT given. Don't provide personal opinions or beliefs.
    If the CONTEXT doesn't match or contain the answer, give NONE as the response.
    
    QUESTION : {question}
    
    CONTEXT: {context}
    
    """.strip()

    context = ""
    for doc in search_results:
        context = f'{context}sub.category: {doc["sub.category"]} \nquestion: {doc["question"]} \nanswer: {doc["answer"]}\n\n'

    return prompt_template.format(question= query, context=search_results).strip()

In [8]:
def llm(prompt):
    model = genai.GenerativeModel("gemini-1.5-pro")
    result = model.generate_content(prompt)
    return result.text
    

In [9]:
def rag(query):
    results = search(query)
    prompt = build_prompt(query, results)
    return llm(prompt)

In [10]:
rag("How do I live a fullfilled life?")

'To live a fulfilled life as a Christian means understanding that your best life is not to be found in this world, but in the world to come.  While non-Christians focus on maximizing their earthly experience, Christians find their greatest joy and fulfillment in their future with God in heaven. This involves prioritizing eternal treasures over earthly possessions and living a life of faith, obedience, and service to God.  It is not a life free of hardship, but one where trials are seen as opportunities for growth and where faith perseveres to the end.  This is achieved through a close relationship with God, nurtured by prayer and study of His Word, and fellowship with other believers.\n'

In [11]:
## Implementing Vector Search using elastic search

from elasticsearch import Elasticsearch

search_client = Elasticsearch("http://localhost:9200/")
search_client.info()

ObjectApiResponse({'name': 'a261ab5c6127', 'cluster_name': 'docker-cluster', 'cluster_uuid': '4OAVz4ybTYeTVYzn1UcwhQ', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [12]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "answer": {"type": "text"},
            "sub.category": {"type": "text"},
            "question": {"type": "text"},
            "category": {"type": "keyword"} 
        }
    }
}

# delete an index


index_name = "got_questions"
# search_client.indices.delete(index=index_name)
search_client.indices.create(index=index_name, body=index_settings)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'got_questions'})

In [13]:
from tqdm import tqdm

for doc in tqdm(data):
    search_client.index(index=index_name, document=doc)


100%|██████████| 6398/6398 [01:30<00:00, 70.64it/s] 


In [14]:
def elastic_search(query):
    result_docs = []
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "answer", "sub.category"],
                        "type": "best_fields"
                    }
                }
            }
        }
    }
    response = search_client.search(index=index_name, body=search_query)
    result_docs.extend(hits['_source'] for hits in response['hits']['hits'])
    return result_docs

In [16]:
# elastic_search("The love of God is always merciful, what does tPrincipalities and powerhis mean?")

In [21]:
query = "Who is the devil and how does he ruin us as believers?"
search_query = elastic_search(query)
prompt = build_prompt(query, search_query)
answer = llm(prompt)
print(answer)

The devil is a deceiver who “prowls around like a roaring lion seeking whom he may devour” (1 Peter 5:8).  He tempts humans to sin against God, appealing to the lust of the flesh, the lust of the eyes, and the pride of life (1 John 2:16).  He also fills people’s hearts with lies (Acts 5:3). He uses various tactics such as exploiting weaknesses, manipulating with false guilt, twisting Scripture, and deceiving.



In [None]:
# Vector search and Embeddings

from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")
model.encode("This is a simple sentence")

In [28]:
operations = []
for doc in tqdm(data):
    doc["answer_embedding"] = model.encode(doc["answer"]).tolist()
    operations.append(doc)

In [31]:
len(model.encode("This is a simple sentence"))

384

In [80]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "answer": {"type": "text"},
            "sub.category": {"type": "text"},
            "question": {"type": "text"},
            "category": {"type": "keyword"} ,
            "answer_embedding": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"},
        }
    }
}

In [81]:
index_name = "vector-embeddings-answer"

search_client.indices.delete(index=index_name, ignore_unavailable=True)
search_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'vector-embeddings-answer'})

In [82]:
for doc in tqdm(operations):
    try:
        search_client.index(index=index_name, document=doc)
    except Exception as e:
        print(e)

100%|██████████| 6398/6398 [01:58<00:00, 53.85it/s]


In [86]:
search_term = "Tell me about Angels, Are they servants or messengers?"
vector_search_term = model.encode(search_term)

In [90]:
query = {
    "field": "answer_embedding",
    "query_vector": vector_search_term,
    "k": 5,
    "num_candidates": 10000
}

In [91]:
results = []

response = search_client.search(index=index_name, knn=query, source=["category", "sub.category", "question", "answer"])
results.extend(hits['_source'] for hits in response['hits']['hits'])

In [93]:
prompt = build_prompt(search_term, results)
answer = llm(prompt)
print(answer)

Angels act as messengers, bringing the word of God to people (Matthew 1:20; 2:13; Luke 1:11–20; Acts 1:10–11; 8:26).  They are also ministering spirits sent to serve those who will inherit salvation (Hebrews 1:14).



In [None]:
# HYBRID SEARCH
