In [1]:
import pandas as pd
import numpy as np
import os
from datasets import load_dataset
from dotenv import load_dotenv
import google.generativeai as genai
from minsearch import Index

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
genai.configure(api_key=os.environ["API_KEY"])

In [4]:
# Prepare dataset
# using gotquestions dataset

import json

with open("datasets/questions_answer.json", 'rt') as f_in:
    data = json.load(f_in)

type(data)

list

In [5]:
# Clean answer key in data
for i in range(len(data)):
    data[i]['answer'] = data[i]['answer'].replace('Answer\n', '')

data[3]

{'category': 'Questions about God',
 'sub.category': 'The Nature of God',
 'question': 'What are the attributes of God?',
 'answer': 'The Bible, God’s Word, tells us what God is like and what He is not like. Without the authority of the Bible, any attempt to explain God’s attributes (inherent qualities) would be no better than an opinion, which by itself is often incorrect, especially in understanding God (Job 42:7). To say that it is important for us to try to understand what God is like is a huge understatement. Failure to do so can cause us to set up, chase after, and worship false gods contrary to His will (Exodus 20:3-5).\n\nOnly what God has chosen to reveal of Himself can be known. One of God’s attributes or qualities is “light,” meaning that He is self-revealing in information of Himself (Isaiah 60:19; James 1:17). The fact that God has revealed knowledge of Himself should not be neglected (Hebrews 4:1). Creation, the Bible, and the Word made flesh (Jesus Christ) will help us t

In [5]:

index = Index(
    text_fields = ["question", "answer", "sub.category"],
    keyword_fields = ["category"]
)

index.fit(data)

<minsearch.minsearch.Index at 0x76c4304eb2c0>

In [6]:
def search(query):
# filter_dict = {"category": "Questions about God"}
    boost_dict = {"question": 3, "answer": 1, "sub.category": 1}

    return index.search(query, boost_dict, num_results=5)

In [11]:
def build_prompt(query, search_results):
    prompt_template = """
    You are a spiritual assistant, your goal is to answer questions about christian spirituality like God, Jesus, Holy Spirit, and Life as a believer of this spirituality.
    Answer the QUESTIONS based on the CONTEXT when answering the QUESTION.
    Answer the questions as if you are a spiritual assistant using the CONTEXT given. Don't provide personal opinions or beliefs.
    If the CONTEXT doesn't match or contain the answer, give NONE as the response.
    
    QUESTION : {question}
    
    CONTEXT: {context}
    
    """.strip()

    # context = ""
    # for doc in search_results:
    #     context = f'{context}sub.category: {doc["sub.category"]} \nquestion: {doc["question"]} \nanswer: {doc["answer"]}\n\n'

    return prompt_template.format(question= query, context=search_results).strip()

In [12]:
def llm(prompt):
    model = genai.GenerativeModel("gemini-1.5-pro")
    result = model.generate_content(prompt)
    return result.text
    

In [13]:
def rag(query):
    results = search(query)
    prompt = build_prompt(query, results)
    return llm(prompt)

In [14]:
rag("Who is Jesus Christ?")

"Unlike the question “Does God exist?” the question of whether Jesus Christ existed is asked by relatively few people. Most accept that Jesus was truly a man who lived in Israel 2,000 years ago. The debate begins with the discussion of Jesus’ full identity. Almost every major religion teaches that Jesus was a prophet or a good teacher or a godly man. But the Bible tells us that Jesus was infinitely more than a prophet, a good teacher, or a godly man.  Jesus claimed to be one with the Father, a claim understood by those around him to be a declaration of deity.  The Bible supports this claim in numerous passages, including John 1, John 10:30, John 8:58, Titus 2:13, 2 Peter 1:1, and Hebrews 1:8.  Old Testament prophecies, such as Isaiah 9:6, also foretold the coming of God in the form of a child. Jesus’ identity as God is crucial because it speaks to his trustworthiness, the validity of the apostles' testimony, the sufficiency of his sacrifice for sin, and his role as the sole mediator be

In [10]:
## Implementing Vector Search using elastic search

from elasticsearch import Elasticsearch

search_client = Elasticsearch("http://localhost:9200/")
search_client.info()

ObjectApiResponse({'name': '1c8c5ef7455c', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'udKfcH7JTWKMWqDcPwIwbA', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [12]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "answer": {"type": "text"},
            "sub.category": {"type": "text"},
            "question": {"type": "text"},
            "category": {"type": "keyword"} 
        }
    }
}

# delete an index


index_name = "got_questions"
# search_client.indices.delete(index=index_name)
search_client.indices.create(index=index_name, body=index_settings)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'got_questions'})

In [13]:
from tqdm import tqdm

for doc in tqdm(data):
    search_client.index(index=index_name, document=doc)


100%|██████████| 6398/6398 [01:30<00:00, 70.64it/s] 


In [14]:
def elastic_search(query):
    result_docs = []
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "answer", "sub.category"],
                        "type": "best_fields"
                    }
                }
            }
        }
    }
    response = search_client.search(index=index_name, body=search_query)
    result_docs.extend(hits['_source'] for hits in response['hits']['hits'])
    return result_docs

In [16]:
# elastic_search("The love of God is always merciful, what does tPrincipalities and powerhis mean?")

In [21]:
query = "Who is the devil and how does he ruin us as believers?"
search_query = elastic_search(query)
prompt = build_prompt(query, search_query)
answer = llm(prompt)
print(answer)

The devil is a deceiver who “prowls around like a roaring lion seeking whom he may devour” (1 Peter 5:8).  He tempts humans to sin against God, appealing to the lust of the flesh, the lust of the eyes, and the pride of life (1 John 2:16).  He also fills people’s hearts with lies (Acts 5:3). He uses various tactics such as exploiting weaknesses, manipulating with false guilt, twisting Scripture, and deceiving.



In [6]:
# Vector search and Embeddings

from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="./cache")
model.encode("This is a simple sentence")

array([ 9.74732116e-02,  6.04952388e-02,  4.48007435e-02,  4.43959720e-02,
        2.89802290e-02,  2.46002525e-02,  2.87308935e-02, -3.63137876e-03,
        7.79270679e-02,  4.62326556e-02,  7.30155483e-02, -7.47010335e-02,
       -1.49526345e-02, -2.45079212e-02,  7.45045021e-03, -3.75776179e-02,
        2.74516121e-02, -5.38864881e-02, -1.24066614e-01, -7.13273790e-03,
       -1.03776567e-02,  3.88279147e-02, -4.40417938e-02, -1.80881936e-02,
       -6.37384579e-02,  5.26146889e-02, -6.59174621e-02,  5.18055223e-02,
        9.10281539e-02, -2.85339616e-02, -2.75604744e-02, -1.53536592e-02,
        5.78026436e-02,  3.17436159e-02, -9.66506451e-03, -2.25416454e-03,
        3.52889951e-03,  2.36713048e-02,  3.73884314e-03, -7.30207143e-03,
        1.30881998e-03, -5.69570325e-02, -7.05083553e-03,  3.59891914e-02,
       -2.65217721e-02, -3.53452489e-02, -2.56818347e-03, -2.15219613e-02,
        3.56441922e-02,  3.21477279e-02, -7.80247301e-02, -7.43311942e-02,
       -6.67421445e-02, -

In [7]:
from tqdm.autonotebook import tqdm

operations = []
for doc in tqdm(data):
    doc["answer_embedding"] = model.encode(doc["answer"]).tolist()
    operations.append(doc)

100%|██████████| 6398/6398 [10:51<00:00,  9.82it/s]


In [31]:
len(model.encode("This is a simple sentence"))

384

In [8]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "answer": {"type": "text"},
            "sub.category": {"type": "text"},
            "question": {"type": "text"},
            "category": {"type": "keyword"} ,
            "answer_embedding": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"},
        }
    }
}

In [11]:
index_name = "vector-embeddings-answer"

search_client.indices.delete(index="got_question_v", ignore_unavailable=True)
search_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'vector-embeddings-answer'})

In [12]:
for doc in tqdm(operations):
    try:
        search_client.index(index=index_name, document=doc)
    except Exception as e:
        print(e)

  0%|          | 0/6398 [00:00<?, ?it/s]

 15%|█▌        | 980/6398 [00:09<00:49, 108.88it/s]


KeyboardInterrupt: 

In [86]:
search_term = "Tell me about Angels, Are they servants or messengers?"
vector_search_term = model.encode(search_term)

In [90]:
query = {
    "field": "answer_embedding",
    "query_vector": vector_search_term,
    "k": 5,
    "num_candidates": 10000
}

In [91]:
results = []

response = search_client.search(index=index_name, knn=query, source=["category", "sub.category", "question", "answer"])
results.extend(hits['_source'] for hits in response['hits']['hits'])

In [93]:
prompt = build_prompt(search_term, results)
answer = llm(prompt)
print(answer)

Angels act as messengers, bringing the word of God to people (Matthew 1:20; 2:13; Luke 1:11–20; Acts 1:10–11; 8:26).  They are also ministering spirits sent to serve those who will inherit salvation (Hebrews 1:14).



In [None]:
# HYBRID SEARCH
