In [1]:
import os
from dotenv import load_dotenv
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.schema import Document, SystemMessage, HumanMessage

In [2]:
load_dotenv()
api_key = os.getenv('GROQ_API_KEY')

In [3]:
chat = ChatGroq(temperature=0, groq_api_key=api_key, model_name="llama3-70b-8192")

In [4]:
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2')

  from tqdm.autonotebook import tqdm, trange
comet_ml is installed but `COMET_API_KEY` is not set.


In [5]:
persist_directory = "../RAG_3_vectordb_3_separate codes/paragraphs_chroma_db"

In [6]:
if not os.path.exists(persist_directory):
    print("Persist directory does not exist.")
else:
    print("Persist directory exists.")

Persist directory exists.


In [7]:
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding_model)

In [8]:
metadata_field_info = [
    AttributeInfo(
        name="year",
        description="Year the paper was published",
        type="integer",
    ),
    AttributeInfo(
        name="title",
        description="Title of the paper",
        type="string",
    ),
    AttributeInfo(
        name="last_section_title",
        description="Title section is associated with paragraph",
        type="string",
    )
]

document_content_description = "Provides context of each paragraph within the article"

In [9]:
retriever = SelfQueryRetriever.from_llm(
    llm=chat,
    vectorstore=vectorstore,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [10]:
query = "How many articles were published in 2016"
retriever.get_relevant_documents(query)

  warn_deprecated(


[Document(page_content='Title: A Temporally Situated Self-Agency Theory of Information Technology Reinvention Year: 2016 Last Section Title: Reinvention Narrative Construction Paragraph: What would be my pie in the sky? Gosh, something that would be voice activated so that I would not have to call or dial or do anything. Just maybe a headset or a little video screen in front of my eyes like a pair of shades.... Whatever I am thinking or saying it does (Cousins and Robey 2005, pp. 163, 164). ', metadata={'last_section_title': 'Reinvention Narrative Construction', 'title': 'A Temporally Situated Self-Agency Theory of Information Technology Reinvention', 'year': 2016}),
 Document(page_content="Title: A Temporally Situated Self-Agency Theory of Information Technology Reinvention Year: 2016 Last Section Title: Use Paragraph: The unique number automaticall y generated by the IT becomes a marker of the reliability of the incident's resolution Appropriation; Improvisation ", metadata={'last_se

In [11]:
custom_prompt_template = """Use the following pieces of information to answer the user's question. Always answear the question as if you were a human and answear in full sentance. During your answear be really specific. If you don't know the answer, just say that you don't know, don't try to make up an answer.



Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [12]:
def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt

prompt = set_custom_prompt()

In [13]:
qa = RetrievalQA.from_chain_type(
    llm=chat,
    chain_type='stuff',
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={'prompt': prompt}
)

In [14]:
query = "How was study conduted in article: A Multilevel Model of Resistance to Information Technology Implementation, be very specififc"
result = qa({"query": query})
print("Answer:", result["result"])

  warn_deprecated(


Answer: The study was conducted in two stages, with the first stage involving within-case analysis to allow unique patterns of each case to emerge, and the second stage involving cross-case analysis using analytic induction to search for common patterns and unique features. To ensure internal validity, three sources of evidence were used: direct observation, documentation, and interviews. The researchers spent several days at each site observing how each Electronic Medical Record (EMR) was used, analyzed system and project documentation, and conducted interviews, which were then corroborated, validated, and complemented with data from the other sources. The cross-case analysis was conducted using two tactics: selecting categories to identify patterns in each dimension of the framework, and comparing cases in pairs to identify subtle similarities and differences. The research sites were selected to maximize variation and allow comparison, with similarities and variations in three charac

In [15]:
query = "What is the title of article that mentiones case study"
result = qa({"query": query})
print("Answer:", result["result"])

Answer: None of the provided texts explicitly mention a "case study", so I don't have a specific title to provide as an answer.


In [16]:
query = "Is case study mentioned in article When Does Technology Use Enable Network Change in Organizations? A Comparative Study of Feature Use and Shared Affordances"
result = qa({"query": query})
print("Answer:", result["result"])

Answer: Yes, the case study mentioned in the article is about Autoworks, a company in the automotive industry, specifically focusing on the Safety division, where the researcher conducted ethnographic data collection and sociometric surveys to study the impact of CrashLab technology on network change in organizations.


In [18]:
query = "Give me names of articles published in year 2007 that contain entity case study"
result = qa({"query": query})
print("Answer:", result["result"])

Answer: Unfortunately, I don't know the answer to this question as the provided context does not mention specific article names published in 2007 that contain an entity case study. The context appears to be a passage from a research paper or article itself, discussing various concepts and ideas related to system usage in organizations, but it does not provide a list of article names that meet the specified criteria.


In [19]:
query = "Is case study mentioned in article: Understanding User Revisions When Using Information System Features: Adaptive System Use and Triggers. if it is not mentioned, which kind of study was used"
result = qa({"query": query})
print("Answer:", result["result"])

Answer: No, a case study is not mentioned in the article "Understanding User Revisions When Using Information System Features: Adaptive System Use and Triggers". Instead, the study used a survey and Q-sort methodology, as described in the Research Design and Step 2: Interviews sections.


In [25]:
query = "Was PLS used in An Alternative to Methodological Individualism: A Non-Reductionist Approach to Studying Technology Adoption by Groups"
result = qa({"query": query})
print("Answer:", result["result"])

Answer: Yes, PLS-Graph Version 3.00 was used for analyzing the data in the study "An Alternative to Methodological Individualism: A Non-Reductionist Approach to Studying Technology Adoption by Groups" in 2010.


In [29]:
query = "Which groups of people were involved in the survey in article: How Habit Limits the Predictive Power of Intention: The Case of Information Systems Continuance"
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The group of people involved in the survey in the article "How Habit Limits the Predictive Power of Intention: The Case of Information Systems Continuance" were business students at a university in Hong Kong.
