## LLAMA INDEX INTEGRATION FOR NEURALDB

In [None]:
# Install requiremnts
!pip install llama-index
!pip install thirdai[neural_db]
!pip install docx2txt

In [None]:
import os
from thirdai import licensing

if "THIRDAI_KEY" in os.environ:
    licensing.activate(os.environ["THIRDAI_KEY"])
else:
    licensing.activate("")  # Enter your ThirdAI key here

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = ""  # Enter your OpenAI key here

In [None]:
from llama_index.core import SimpleDirectoryReader

# Path to the directory containing all the files
document_directory = "data/"

# load documents
documents = SimpleDirectoryReader(document_directory).load_data()

In [None]:
from llama_index.core import Settings

nodes = Settings.node_parser.get_nodes_from_documents(documents)

In [None]:
from llama_index.core import StorageContext

# initialize storage context (by default it's in-memory)
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

# Optional code for Redacting PII

In [None]:
# You can look into https://github.com/ThirdAILabs/Demos/tree/main/universal_deep_transformer/named_entity_recognition for more info about our NER models.
import os
from thirdai import bolt

if not os.path.isdir("./models/"):
    os.system("mkdir ./models/")

if not os.path.exists("./models/pretrained_multilingual.model"):
    os.system(
        "wget -nv -O ./models/pretrained_multilingual.model 'https://www.dropbox.com/scl/fi/z3xo7nqbjpo1xsvl9b0xh/ner_model_new.bolt?rlkey=md3lw409d55krjdm2ao6kjo7o&st=jnv84wtg&dl=0'"
    )
    

pii_model = bolt.UniversalDeepTransformer.load("./models/pretrained_multilingual.model")
    
def redact_pii(node):
    text = node.text.replace("\n"," ")
    predicted_tags = pii_model.predict({"source": text}, top_k=1)
    
    tokens = text.split()
    redacted_tokens = [
        predicted_tags[i][0][0] if predicted_tags[i][0][0] != "O" else token
        for i, token in enumerate(tokens)
    ]
    node.text = " ".join(redacted_tokens)
    return node

# Custom Retriever for NeuralDB

In [None]:
from typing import List
from thirdai import neural_db as ndb
from llama_index.core.retrievers import (
    BaseRetriever,
)
# import NodeWithScore
from llama_index.core.schema import NodeWithScore

class NDBretriever(BaseRetriever):
    def __init__(self, nodes, storage_context, top_k=5):
        self.db = self.constructdb(nodes)
        self.storage_context = storage_context
        self.top_k = top_k
        
    def constructdb(self, nodes):
        db = ndb.NeuralDB()
        docs = []
        for node in nodes:
            doc = ndb.InMemoryText(name=node.node_id,texts=[node.text])
            docs.append(doc)
        
        db.insert(docs)
        
        return db

    def _retrieve(self, query_bundle):
        results = self.db.search(query_bundle.query_str, top_k=self.top_k)
        node_with_scores: List[NodeWithScore] = []
        for result in results:
            node = self.storage_context.docstore.get_node(result.source)
            # To Remove PII information from the text before sending to LLM, we can use our NER model to detect and remove PII. Uncomment the following comments to use it.
            # node = redact_pii(node)
            
            # To see what exactly goes into LLM, you can use the following
            # print(node.get_content())
            node_with_scores.append(NodeWithScore(node=node, score=result.score))
        
        return node_with_scores

In [None]:
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine

# define custom retriever
ndb_retriever = NDBretriever(nodes=nodes,storage_context=storage_context,top_k=5)

# define response synthesizer
response_synthesizer = get_response_synthesizer()

# assemble query engine
custom_query_engine = RetrieverQueryEngine(
    retriever=ndb_retriever,
    response_synthesizer=response_synthesizer,
)

# Optional code for how to use our RLHF Capabilities

In [None]:
# Look into https://github.com/ThirdAILabs/Demos/blob/main/neural_db/main_example.ipynb for all the functionalities of NeuralDB you can use. Lets see some sample examples.

# To associate a source query and target
ndb_retriever.db.associate(source="who are the parties involved", target="made by and between")

# To use our upvote functionality
results = ndb_retriever.db.search(query="made by and between",top_k=10)
for result in results:
    print(result.id)
    print(result.text)
    # print(result.source)
    # print(result.metadata)
 
# If you think that 4th id answer is more relevant and correct for given query, you can teach model by upvoting 
ndb_retriever.db.text_to_result("made by and between",4)

# Query your application

In [None]:
response = custom_query_engine.query(
    "made by and between"
)
print(response)