In [3]:
import os
from dotenv import load_dotenv
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.schema import Document, SystemMessage, HumanMessage

Initializing the GROQ API key that is stored in.env file

In [2]:
load_dotenv()
api_key = os.getenv('GROQ_API_KEY')

Setting the LLM model which we want to use

In [3]:
chat = ChatGroq(temperature=0, groq_api_key=api_key, model_name="llama3-70b-8192")

Initializing embeddings and model which we want to use

In [4]:
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2')

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


Specifying the directory where vector database is stored

In [1]:
persist_directory = "../RAG_3_vectordb_3_separate codes/article_chroma_db"

Checking if the directory exists

In [4]:
if not os.path.exists(persist_directory):
    print("Persist directory does not exist.")
else:
    print("Persist directory exists.")

Persist directory exists.


Loading already created vector database. By specifying the embedding function, we are ensuring that the same model used to create the database is being used to query it.

In [None]:
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding_model)

Providing additional information about the metadata for more precise filtering and details about the document description helps the chain understand the document's content.

In [6]:
metadata_field_info = [
    AttributeInfo(
        name="authors",
                description="Authors of the paper",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="year",
        description="Year the paper was published",
        type="integer",
    ),
    AttributeInfo(
        name="abstract",
        description="Abstract of the article",
        type="string",
    ),
    AttributeInfo(
        name="title",
        description="Title of the paper",
        type="string",
    ),
    AttributeInfo(
        name="keywords",
        description="Keywords associated with the paper",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="citation_count",
        description="Number of citations the paper has received",
        type="integer",
    )
]

document_content_description = "Provides information about article"

Setting up the SelfQueryRetriever that is used for retriving documents from the vector database. In this chain we specify that we are using Llama 3 as our LLM model, which sees analyzes the prompt and structures it into a query, article_chroma_db as our vector database and provide also the additional attribute info about metadata and document description.

In [7]:
retriever = SelfQueryRetriever.from_llm(
    llm=chat,
    vectorstore=vectorstore,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose=True
)

Creating a custom prompt instructs the model to always respond in full sentences and to say "I don't know" if it doesn't know the answer. This approach prevents Llama 3 from generating random responses simply to fulfill the expectation of an answer.

In [8]:
custom_prompt_template = """Use the following pieces of information to answer the user's question. Always answear the question as if you were a human and answear in full sentance. During your answear be really specific. If you don't know the answer, just say that you don't know, don't try to make up an answer.



Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [9]:
def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt

prompt = set_custom_prompt()

Here, we define the complete question-answering chain in RAG. We specify that Llama 3 is the LLM model in use, the retriever is the SelfQueryRetriever we created earlier, chain_type="stuff" indicates a basic chain where the LLM answers based solely on the user's question and retrieved documents, and finally, we ensure that the custom prompt we created is used.

In [10]:
qa = RetrievalQA.from_chain_type(
    llm=chat,
    chain_type='stuff',
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={'prompt': prompt}
)

This is the testing part where we actually give RAG questions and recieve answears from him.

In [11]:
query = "How many articles were published in 2016"
result = qa({"query": query})
print("Answer:", result["result"])

  warn_deprecated(


Answer: I don't know.


In [12]:
query = "Which article had citation count higher than 250"
result = qa({"query": query})
print("Answer:", result["result"])

Answer: I don't know.


In [13]:
query = "Which sections does article A Multilevel Model of Resistance to Information Technology Implementation have"
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The article "A Multilevel Model of Resistance to Information Technology Implementation" has the following sections: Introduction, Literature Review, Theoretical Background, Hypotheses Development, Methodology, Data Analysis, Results, Discussion, Conclusion, and References.
