In [13]:
import os
from dotenv import load_dotenv
from chromadb import Client as ChromaClient
from langchain.chains import ConversationalRetrievalChain
from langchain_community.vectorstores.chroma import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA

In [14]:
load_dotenv()
api_key = os.getenv('GROQ_API_KEY')

In [15]:
chat = ChatGroq(temperature=0, groq_api_key=api_key, model_name="llama3-70b-8192")

In [17]:
chroma = ChromaClient()
collection_name = 'paper_title_embeddings'

In [18]:
embedding_model = 'sentence-transformers/paraphrase-MiniLM-L6-v2'
hf_embeddings = HuggingFaceEmbeddings(model_name=embedding_model)

  from tqdm.autonotebook import tqdm, trange
comet_ml is installed but `COMET_API_KEY` is not set.


In [20]:
vector_store = Chroma(client=chroma, collection_name=collection_name,embedding_function=hf_embeddings)

In [28]:
retriever = vector_store.as_retriever()

In [21]:
collection = chroma.get_collection(collection_name)

In [22]:
def verify_indexed_documents(collection):
    try:
        indexed_docs = collection.get()
        for doc in indexed_docs:
            print(f"Indexed Document: {doc}")
    except Exception as e:
        print(f"Error verifying indexed documents: {e}")

# Verify indexed documents
verify_indexed_documents(collection)

Indexed Document: ids
Indexed Document: embeddings
Indexed Document: metadatas
Indexed Document: documents
Indexed Document: uris
Indexed Document: data


In [23]:
def test_retriever(vector_store):
    try:
        retriever = vector_store.as_retriever(search_kwargs={'k': 3})
        query = "ad blockers"
        results = retriever.get_relevant_documents(query)
        print("Retriever Test Results:")
        if not results:
            print("No documents were retrieved.")
        for result in results:
            print(f"Document ID: {result.metadata.get('id', 'N/A')}")
            print(f"Document Text: {result.page_content}")
            print(f"Document Metadata: {result.metadata}")
    except Exception as e:
        print(f"Error testing retriever: {e}")

# Test the retriever
test_retriever(vector_store)

  warn_deprecated(


Retriever Test Results:
No documents were retrieved.


In [24]:
custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [25]:
from langchain.prompts import PromptTemplate

In [26]:
def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt
#
prompt = set_custom_prompt()
prompt

########################### RESPONSE ###########################
PromptTemplate(input_variables=['context', 'question'], template="Use the following pieces of information to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nContext: {context}\nQuestion: {question}\n\nOnly return the helpful answer below and nothing else.\nHelpful answer:\n")

PromptTemplate(input_variables=['context', 'question'], template="Use the following pieces of information to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nContext: {context}\nQuestion: {question}\n\nOnly return the helpful answer below and nothing else.\nHelpful answer:\n")

In [29]:
qa = RetrievalQA.from_chain_type(
    llm=chat,
    chain_type='stuff',
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={'prompt': prompt})

In [33]:
qa = RetrievalQA.from_chain_type(
    llm=chat,
    chain_type='stuff',
    retriever=retriever,
    return_source_documents=True
)

In [30]:
response = qa.invoke({"query": "give me titles that contain ad blockers"})

In [34]:
query = "give me article about ad blockers"
response = qa.invoke({"query": query})
print("Query:", query)
print("Result:", response["result"])
print("Source Documents:")
for doc in response["source_documents"]:
    print(f"Document ID: {doc['id']}")
    print(f"Document Text: {doc['document']}")
    print(f"Document Metadata: {doc['metadata']}")

Query: give me article about ad blockers
Result: Here's an article about ad blockers:

**What are Ad Blockers?**

Ad blockers are software programs or browser extensions that prevent advertisements from being displayed on a website or mobile app. They work by identifying and blocking the scripts that load ads on a webpage, thereby improving the user experience by reducing clutter and increasing page loading speeds.

**How Do Ad Blockers Work?**

Ad blockers use various techniques to identify and block ads. Some common methods include:

1. **Filter lists**: Ad blockers use filter lists, which are lists of known ad servers and patterns that identify ads. These lists are regularly updated to ensure that new ads are blocked.
2. **JavaScript injection**: Ad blockers inject JavaScript code into web pages to identify and remove ads.
3. **Element hiding**: Ad blockers use CSS to hide ad elements on a webpage.

**Types of Ad Blockers**

1. **Browser extensions**: Browser extensions like uBlock 

In [32]:
query = "give me article about ad blockers"
response = qa({"query": query})
print("Query:", query)
print("Result:", response["result"])
print("Source Documents:")
for doc in response["source_documents"]:
    print(doc)

  warn_deprecated(


Query: give me article about ad blockers
Result: I don't know.
Source Documents:
