## NYAYAKOSH : A Question Answering Tool on the Indian Penal Code

In [None]:
# Install libraries
!pip install pinecone-client>=3.0.0
!pip install arxiv==2.1.0
!pip install setuptools==-69.0.3  # (Optional)

In [None]:
!pip install llama-index==0.9.34

In [None]:
!pip install sentence-transformers

In [8]:
# Set environment variables for API keys
import os
os.environ[
    "PINECONE_API_KEY"
] = "65077d46-c5f7-4dc1-9c4a-c9bfb9b17d3a"

api_key = os.environ["PINECONE_API_KEY"]

In [9]:
from pinecone import Pinecone

pc = Pinecone(api_key=api_key)

In [10]:
pinecone_index = pc.Index("test")

In [11]:
from llama_index.vector_stores import PineconeVectorStore

vector_store = PineconeVectorStore(pinecone_index)

### EMBEDDINGS

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# loads BAAI/bge-small-en-v1.5
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")

## Defining Model

In [13]:
from llama_index.prompts import Prompt
from llama_index.prompts.prompt_type import PromptType

from llama_index import PromptTemplate

qa_prompt_tmpl = (
    "Context information is below.\n"
    "---------------------\n"
    "<context>\n"
    "{context_str}\n"
    "</context>\n"
    "---------------------\n"
    "Instructions are given below.\n"
    "answer the query based on given Context. \n"
    "If you do not find the answer from Context then write response like below. \n"
    "Do not able to answer this question.\n"
    "Query is given below.\n"
    "<query>\n"
    "{query_str}\n"
    "</query>\n"
    "Take a deep breath and provide the answer from the context and not from the prior knowledge.\n"
)

qa_prompt = PromptTemplate(qa_prompt_tmpl)

system_prompt="""
You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided.
"""

In [None]:
!pip install llama-index-llms-huggingface

In [15]:
# api: https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2
# hf: 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
from llama_index.llms.huggingface import (
    HuggingFaceInferenceAPI,
    HuggingFaceLLM,
)
HF_TOKEN = "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"
llm = HuggingFaceInferenceAPI(
    model_name="mistralai/Mistral-7B-Instruct-v0.2", token=HF_TOKEN
)

### Defining sentence_context

In [16]:
from llama_index import VectorStoreIndex, ServiceContext

service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
)

index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context)

### Sentence window retrieval


#### Defining Query Engine

In [17]:
import os
from llama_index import VectorStoreIndex
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index import load_index_from_storage
from llama_index.response_synthesizers import get_response_synthesizer
from llama_index.response_synthesizers import ResponseMode
from llama_index.prompts.default_prompts import DEFAULT_REFINE_PROMPT_TMPL

from llama_index.indices.vector_store.retrievers import VectorIndexRetriever
from llama_index.query_engine.retriever_query_engine import (
    RetrieverQueryEngine,
)

def get_sentence_window_query_engine(
    qa_prompt_tmpl,
    service_context,
    index,
    similarity_top_k=6,
    rerank_top_n=3,
):
    # define postprocessors
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    response_synthesizer_sentence = get_response_synthesizer(
      service_context=service_context,
      response_mode=ResponseMode.COMPACT,
      text_qa_template=Prompt(qa_prompt_tmpl, prompt_type=PromptType.QUESTION_ANSWER),
      refine_template=Prompt(DEFAULT_REFINE_PROMPT_TMPL, prompt_type=PromptType.REFINE),
    )

    # build retriever
    retriever = VectorIndexRetriever(
        index=index,
        similarity_top_k=6,
    )
    sentence_window_engine = RetrieverQueryEngine(
        retriever=retriever, response_synthesizer=response_synthesizer_sentence, node_postprocessors=[postproc, rerank]
    )

    # Another way to create query engine
    # sentence_window_engine = index.as_query_engine(
    #     similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank], response_synthesizer=response_synthesizer_sentence
    # )
    return sentence_window_engine

In [None]:
sentence_window_engine = get_sentence_window_query_engine(qa_prompt_tmpl, service_context, index)


#### Testing on a user query

In [15]:
window_response = sentence_window_engine.query(
    "what is the punishment for making false claim in court?"
)
print(str(window_response))


Answer:
Whoever fraudulently or dishonestly, or with intent to injure or annoy any person, makes in a Court of Justice any claim which he knows to be false, shall be punished with imprisonment of either description for a term which may extend to two years, and shall also be liable to fine. (Section 209, Penal Code)


# Evaluation

In [3]:
# The nest_asyncio module enables the nesting of asynchronous functions within an already running async loop.
# This is necessary because Jupyter notebooks inherently operate in an asynchronous loop.
# By applying nest_asyncio, we can run additional async functions within this existing loop without conflicts.
import nest_asyncio

nest_asyncio.apply()

from llama_index.evaluation import generate_question_context_pairs
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.node_parser import SimpleNodeParser
from llama_index.evaluation import generate_question_context_pairs
from llama_index.evaluation import RetrieverEvaluator
from llama_index.llms import OpenAI

import os
import pandas as pd

##### Hugging Face Token

In [30]:
HF_TOKEN_NEW = "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"

##### Mistral Model

In [31]:
llm_new = HuggingFaceInferenceAPI(
    model_name="mistralai/Mistral-7B-Instruct-v0.2", token=HF_TOKEN_NEW
)

In [None]:
!pip install pypdf

In [22]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["./penal_code.pdf"]
).load_data()

In [23]:
# Build index with a chunk_size of 512
node_parser = SimpleNodeParser.from_defaults(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(documents)

In [27]:
len(nodes)

369

In [32]:
nodes_ = nodes[1:150]
len(nodes_)

149

In [33]:
# llm_gpt = OpenAI()
qa_dataset = generate_question_context_pairs(
    nodes_,
    llm=llm_new,
    num_questions_per_chunk=2
)

100%|██████████| 149/149 [00:14<00:00, 10.28it/s]


 Get the list of queries to test from the above created dataset

In [34]:
queries = list(qa_dataset.queries.values())

In [36]:
service_context_llm = ServiceContext.from_defaults(llm=llm_new)

In [37]:
from llama_index.evaluation import FaithfulnessEvaluator
faithfulness_gpt4 = FaithfulnessEvaluator(service_context=service_context_llm)

In [38]:
eval_query = queries[10]
eval_query

'Which sections of the Indian Penal Code discuss offences related to waging war against the Government of India and abetting such acts? Provide two examples of such sections.'

In [39]:
response_vector = sentence_window_engine.query(eval_query)

In [40]:
eval_result = faithfulness_gpt4.evaluate_response(response=response_vector)

In [41]:
from llama_index.evaluation import RelevancyEvaluator

relevancy_gpt4 = RelevancyEvaluator(service_context=service_context_llm)

Batch Evaluator

In [42]:
# Batch Evaluation
from llama_index.evaluation import BatchEvalRunner

# Let's pick top 10 queries to do evaluation
batch_eval_queries = queries[:10]

# Initiate BatchEvalRunner to compute FaithFulness and Relevancy Evaluation.
runner = BatchEvalRunner(
    {"faithfulness": faithfulness_gpt4, "relevancy": relevancy_gpt4},
    workers=8,
)

# Compute evaluation
eval_results = await runner.aevaluate_queries(
    sentence_window_engine, queries=batch_eval_queries
)

#### faithfulness and relevancy score according to gpt4

In [43]:
# Let's get faithfulness score

faithfulness_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['faithfulness'])
faithfulness_score

0.9

In [44]:
# Let's get relevancy score

relevancy_score = sum(result.passing for result in eval_results['relevancy']) / len(eval_results['relevancy'])
relevancy_score


0.9

The scores for both metrics are 0.9, which is impressive for a tool that requires no local execution as it is using Hugging Face inference API.