In [1]:
import os
from dotenv import load_dotenv
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.schema import Document, SystemMessage, HumanMessage

In [2]:
load_dotenv()
api_key = os.getenv('GROQ_API_KEY')

In [3]:
chat = ChatGroq(temperature=0, groq_api_key=api_key, model_name="llama3-70b-8192")

In [4]:
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2')

  from tqdm.autonotebook import tqdm, trange
comet_ml is installed but `COMET_API_KEY` is not set.


In [5]:
persist_directory = "../RAG_3_vectordb_3_separate codes/entities_chroma_db"

In [6]:
if not os.path.exists(persist_directory):
    print("Persist directory does not exist.")
else:
    print("Persist directory exists.")

Persist directory exists.


In [7]:
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding_model)

In [8]:
metadata_field_info = [
    AttributeInfo(
        name="year",
        description="Year the paper was published",
        type="integer",
    ),
    AttributeInfo(
        name="title",
        description="Title of the paper",
        type="string",
    ),
    AttributeInfo(
        name="last_section_title",
        description="Title section is associated with paragraph",
        type="string or list[string]",
    )
]

document_content_description = "Provides context of each paragraph within the article"

In [9]:
retriever = SelfQueryRetriever.from_llm(
    llm=chat,
    vectorstore=vectorstore,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [10]:
query = "How many articles were published in 2016"
retriever.get_relevant_documents(query)

  warn_deprecated(


[Document(page_content='Sentence: " Certainly it \'s worthwhile " ( onshore manager , Team 1 ) . Entity: onshoring Label of entity: TOPIC', metadata={'ent_id': 'onshoring', 'label': 'TOPIC', 'title': 'Revisiting Group-Based Technology Adoption as a Dynamic Process: The Role of Changing Attitude-Rationale Configurations', 'year': 2016}),
 Document(page_content='Sentence: a lot to show bits of things broken on the screen " ( onshore engineer , Team 3 ) . \n Entity: onshoring Label of entity: TOPIC', metadata={'ent_id': 'onshoring', 'label': 'TOPIC', 'title': 'Revisiting Group-Based Technology Adoption as a Dynamic Process: The Role of Changing Attitude-Rationale Configurations', 'year': 2016}),
 Document(page_content='Sentence: " The concept is very , very good . There \'s no doubt about that " ( onshore engineer , Team 2 ) . Entity: onshoring Label of entity: TOPIC', metadata={'ent_id': 'onshoring', 'label': 'TOPIC', 'title': 'Revisiting Group-Based Technology Adoption as a Dynamic Proc

In [31]:
custom_prompt_template = """Use the following pieces of information to answer the user's question. Always answear the question as if you were a human and answear in full sentance. During your answear be really specific. If you don't know the answer, just say that you don't know, don't try to make up an answer.



Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [32]:
def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt

prompt = set_custom_prompt()

In [33]:
qa = RetrievalQA.from_chain_type(
    llm=chat,
    chain_type='stuff',
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={'prompt': prompt}
)

In [34]:
query = "How was study conduted in article: A Multilevel Model of Resistance to Information Technology Implementation, be very specififc"
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study in the article "A Multilevel Model of Resistance to Information Technology Implementation" was conducted using a combination of research methods, specifically a longitudinal perspective, case study research, and data analysis method.


In [35]:
query = "How was study conduted in article: The Integrative Framework of Technology Use: An Extension and Test."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted using a three-or-more-wave panel study, which is a type of longitudinal research.


In [41]:
query = "Is survey used in article: Predicting Different Conceptualizations of System Use: The Competing Roles of Behavioral Intention, Facilitating Conditions, and Behavioral Expectation"
result = qa({"query": query})
print("Answer:", result["result"])

Answer: Yes, the survey is used in the article as a collection method, specifically a longitudinal survey, to measure behavior over time and track responses from different time periods.


In [42]:
query = "How is data analysed in article: Predicting Different Conceptualizations of System Use: The Competing Roles of Behavioral Intention, Facilitating Conditions, and Behavioral Expectation"
result = qa({"query": query})
print("Answer:", result["result"])

Answer: According to the article, data is analyzed using Partial Least Squares (PLS) regression, specifically with the PLS-Graph software package, version 3, build 1126.


Testing part

In [62]:
query = "How was the study conducted in the article: Nature and Nurture: The Impact of Automaticity and the Structuration of Communication on Virtual Team Behavior and Performance."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted by combining a survey of 70 undergraduate students with an experiment, where the survey and experiment had different samples of participants to avoid influencing the behavior of the experimental participants, and the results were verified using four separate General Linear Model (GLM) analyses in addition to Partial Least Squares (PLS) regression.


In [44]:
query = "How was the study conducted in the article: Understanding User Responses to Information Technology: A Coping Model of User Adaptation."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted by studying the adaptation process of six account managers, three from Bank A and three from Bank B, who were selected to maximize variation in their views and opinions about the systems and to have different usage patterns, following the approach outlined by Miles and Huberman (1994).


In [45]:
query = "How was the study conducted in the article: Toward a Deeper Understanding of System Usage in Organizations: A Multilevel Perspective."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted by having 76 groups of 2 to 5 students use MS Excel to complete a business analysis task.


In [46]:
query = "How was the study conducted in the article: The Integrative Framework of Technology Use: An Extension and Test."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: According to the information provided, the study was conducted through a thorough literature review, specifically identifying past studies that conducted a theory of planned behavior-based three-wave panel study and reported data in the form of correlations.


In [47]:
query = "How was the study conducted in the article: A Multilevel Model of Resistance to Information Technology Implementation."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study in the article "A Multilevel Model of Resistance to Information Technology Implementation" was conducted using a combination of research methods, specifically, a longitudinal perspective, case study research, and data analysis method.


In [48]:
query = "How was the study conducted in the article: When Does Technology Use Enable Network Change in Organizations? A Comparative Study of Feature Use and Shared Affordances."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted in two stages: the first stage involved analyzing tracking logs to identify patterns of technology use, and the second stage involved using observational data to explain why these patterns existed and what consequences arose from them.


In [49]:
query = "How was the study conducted in the article: How Habit Limits the Predictive Power of Intention: The Case of Information Systems Continuance."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study in the article "How Habit Limits the Predictive Power of Intention: The Case of Information Systems Continuance" was conducted using a methodology that involved comparing two models, one with and one without the interaction construct, and testing the results using f-statistics, as indicated in Figure 6, which showed that the path coefficients were significant.


In [50]:
query = "How was the study conducted in the article: A Temporally Situated Self-Agency Theory of Information Technology Reinvention."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted using a prospective qualitative research design, which employed process tracing through verbal protocols as the data collection method, allowing researchers to observe users in their interactions with information technologies and identify the specific reinvention subprocess that currently draws their attention.


In [51]:
query = "How was the study conducted in the article: Why Break the Habit of a Lifetime? Rethinking the Roles of Intention, Habit, and Emotion in Continuing Information Technology Use."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted by reading and analyzing the reference literature in psychology that is frequently cited in the IS research, as well as continuing to read in related fields such as neuropsychology, in order to come to different conclusions about the mental and behavioral processes that underlie continuing IT use.


In [52]:
query = "How was the study conducted in the article: An Investigation of Information Systems Use Patterns: Technological Events as Triggers, the Effect of Time, and Consequences for Performance."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted by randomly assigning participants to one of three experimental conditions: a control group with 42 participants where only expected IT events occurred, a group with 39 participants that included short-term discrepant IT events that could be easily overcome, and a group with 22 participants that included long-term discrepant IT events.


In [53]:
query = "How was the study conducted in the article: The Embeddedness of Information Systems Habits in Organizational and Individual Level Routines: Development and Disruption."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: According to the article, the study was conducted by having student participants keep a diary of all behaviors performed in their daily lives, which included exercising, newspaper reading, and TV watching, in order to examine their habits and routines.


In [54]:
query = "How was the study conducted in the article: Information Technology and the Performance of the Customer Service Process: A Resource-Based Analysis."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted through an empirical analysis in the context of one specific process in the insurance industry.


In [55]:
query = "How was the study conducted in the article: An Alternative to Methodological Individualism: A Non-Reductionist Approach to Studying Technology Adoption by Groups."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted in a multi-stage process, starting with a pilot study to validate the instruments and clarify the experimental procedures, followed by an empirical test of the Model of Technology Adoption by Groups (TAG), and then a post-hoc analysis of the data, which consisted of 86 groups due to some participants not completing the questionnaire after the training session.


In [56]:
query = "How was the study conducted in the article: Revisiting Group-Based Technology Adoption as a Dynamic Process: The Role of Changing Attitude-Rationale Configurations."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted through a case study approach, where data was collected from multiple sources over time, including samples of data and coding in the form of quotes in the text and as tables in Appendix D. The data collection process involved within-case analyses of three teams, and the steps taken for analysis were explained in the method section, including the presentation of intermediate steps in Appendix C, which showed a summary of events and changes in attitudes toward new technologies and reasons for adoption decisions in the three teams. Additionally, the study used a theoretical model, as shown in Figure 1, to summarize the findings, and the authors allowed the data to inform their personal and theoretical perspectives, rather than imposing their own perspectives onto the data.


In [57]:
query = "How was the study conducted in the article: Coping with Information Technology: Mixed Emotions, Vacillation, and Nonconforming Use Patterns."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted by first introducing the theoretical foundation for the study, followed by an explanation of the research method adopted, which involved collecting additional data in the form of meeting recordings, university-wide memos, e-mails, and informal conversations, including over 17.5 hours of observational data, and examining this data to investigate the two research questions.


In [58]:
query = "How was the study conducted in the article: How Do Suppliers Benefit from Information Technology Use in Supply Chain Relationships?"
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted by developing survey items and then collecting data through a data collection method, and subsequently, the data was analyzed using Partial Least Squares (PLS) regression analysis to test the research model and competing models that examined the relationship between information system use and outcomes.


In [59]:
query = "How was the study conducted in the article: Understanding User Revisions When Using Information System Features: Adaptive System Use and Triggers."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted through a survey administered by StudyResponse, a nonprofit academic service that matches researchers with individuals willing to complete surveys, and a pilot study was conducted to ensure the questionnaire was properly compiled and to assess the reliability of the scales.


In [60]:
query = "How was the study conducted in the article: Predicting Different Conceptualizations of System Use: The Competing Roles of Behavioral Intention, Facilitating Conditions, and Behavioral Expectation."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted using a longitudinal research method, which means that it was a long-term study that involved collecting data over a period of time, allowing the researchers to observe and analyze changes and patterns in the behavior of system use.


In [61]:
query = "How was the study conducted in the article: A Dramaturgical Model of the Production of Performance Data."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The study was conducted through full-time participant observation at DeskSales during the first five quarters of the unit's existence, as specified in Table 3 which lists the data collection procedures.
