In [1]:
import os
from dotenv import load_dotenv
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.schema import Document, SystemMessage, HumanMessage

In [2]:
load_dotenv()
api_key = os.getenv('GROQ_API_KEY')

In [3]:
chat = ChatGroq(temperature=0, groq_api_key=api_key, model_name="llama3-70b-8192")

In [4]:
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2')

  from tqdm.autonotebook import tqdm, trange
comet_ml is installed but `COMET_API_KEY` is not set.


In [7]:
persist_directory = "../RAG_3_vectordb_3_separate codes/paragraphs_chroma_db"

In [8]:
if not os.path.exists(persist_directory):
    print("Persist directory does not exist.")
else:
    print("Persist directory exists.")

Persist directory exists.


In [9]:
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding_model)

In [10]:
metadata_field_info = [
    AttributeInfo(
        name="year",
        description="Year the paper was published",
        type="integer",
    ),
    AttributeInfo(
        name="title",
        description="Title of the paper",
        type="string",
    ),
    AttributeInfo(
        name="last_section_title",
        description="Title section is associated with paragraph",
        type="string",
    )
]

document_content_description = "Provides context of each paragraph within the article"

In [11]:
retriever = SelfQueryRetriever.from_llm(
    llm=chat,
    vectorstore=vectorstore,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [12]:
query = "How many articles were published in 2016"
retriever.get_relevant_documents(query)

  warn_deprecated(


[Document(page_content="Title: A Temporally Situated Self-Agency Theory of Information Technology Reinvention Year: 2016 Last Section Title: Use Paragraph: The unique number automaticall y generated by the IT becomes a marker of the reliability of the incident's resolution Appropriation; Improvisation ", metadata={'last_section_title': 'Use', 'title': 'A Temporally Situated Self-Agency Theory of Information Technology Reinvention', 'year': 2016}),
 Document(page_content='Title: A Temporally Situated Self-Agency Theory of Information Technology Reinvention Year: 2016 Last Section Title: Reinvention Narrative Construction : Paragraph: The desire of masteryoriented actors to learn and develop, their tolerance for uncertainty, their willingness to invest effort in overcoming obstacles, and their ability to maintain long-term commitment (Barron and Harackiewicz 2001; Darnon et al. 2007) , lead them to focus on goals that can be achieved in a distant future. They are willing to publicly disp

In [13]:
custom_prompt_template = """Use the following pieces of information to answer the user's question. Always answer the question as if you were a human and in full sentance. If you don't know the answer, just say that you don't know, don't try to make up an answer. Only use information from the datasource.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

def generate_fluent_output(query, retriever, chat, custom_prompt_template):
    results = retriever.get_relevant_documents(query)

    combined_content = "\n\n".join([f"{doc.page_content}\nMetadata: {doc.metadata}" for doc in results])

    formatted_prompt = custom_prompt_template.format(context=combined_content, question=query)
    messages = [
        HumanMessage(content=formatted_prompt)
    ]

    response = chat(messages)
    fluent_output = response.content

    print(fluent_output)

In [14]:
# Your query
query = "What is the sction title How Genre Rules in Instant Messaging and Discussion Forum Affect Use about from the article:Nature and Nurture: The Impact of Automaticity and the Structuration of Communication on Virtual Team Behavior and Performance"

# Generate and print the fluent output
generate_fluent_output(query, retriever, chat, custom_prompt_template)


  warn_deprecated(


The section "How Genre Rules in Instant Messaging and Discussion Forum Affect Use" is about how the subtle differences in form and substance between instant messaging and discussion forum may have powerful effects on their use, specifically on the balance of task performance activities versus non-task social-relationship activities.
