In [14]:
import os
from dotenv import load_dotenv
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate


In [4]:
load_dotenv()

True

In [5]:
api_key = os.getenv('GROQ_API_KEY')

In [6]:
chat = ChatGroq(temperature=0, groq_api_key=api_key, model_name="llama3-70b-8192")

In [7]:
persist_directory = 'chroma_db'
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2')

  from tqdm.autonotebook import tqdm, trange
comet_ml is installed but `COMET_API_KEY` is not set.


In [9]:
vectordb = Chroma(persist_directory=persist_directory,
                  embedding_function=embedding_model,
                  collection_name="title_abstract_chroma_db")

In [10]:
retriever = vectordb.as_retriever()

In [11]:
custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [15]:
def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt

prompt = set_custom_prompt()

In [16]:
qa = RetrievalQA.from_chain_type(
    llm=chat,
    chain_type='stuff',
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={'prompt': prompt}
)

In [19]:
query = "Give me 2 articles that talk about ad blockers."
result = qa({"query": query})

In [20]:
print("Answer:", result["result"])

Answer: Here are 2 articles that talk about ad blockers:

1. Ad-Blockers: A Blessing or a Curse?
2. (No other article talks about ad blockers)


In [22]:
query = "Give me articles that talk about software innovation."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The article "Essence: facilitating software innovation" talks about software innovation.


In [23]:
query = "What is article with the name Data Science for Scoial Good about."
result = qa({"query": query})
print("Answer:", result["result"])

Answer: The article "Data Science for Social Good" is about the diminishing emphasis on social good challenges in data science research, and presents a framework for "data science for social good" research that considers the interplay between relevant data science research genres, social good challenges, and different levels of sociotechnical abstraction.


In [24]:
import textwrap


In [25]:
query = "What is the article with the name Data Science for Social Good about."
result = qa({"query": query})
wrapped_answer = textwrap.fill(result["result"], width=80)
print("Answer:\n", wrapped_answer)

Answer:
 The article "Data Science for Social Good" is about presenting a framework for
"data science for social good" (DSSG) research that considers the interplay
between relevant data science research genres, social good challenges, and
different levels of sociotechnical abstraction, and highlighting the lack of
research focus on social good challenges in the field of data science.


In [26]:
query = "Give me full abstract of article Data Science for Social Good."
result = qa({"query": query})
wrapped_answer = textwrap.fill(result["result"], width=80)
print("Answer:\n", wrapped_answer)

Answer:
 Here is the abstract of the article "Data Science for Social Good":  Data
science has been described as the fourth paradigm of scientific discovery. The
latest wave of data science research, pertaining to machine learning and
artificial intelligence (AI), is growing exponentially and garnering millions of
annual citations. However, this growth has been accompanied by a diminishing
emphasis on social good challenges-our analysis reveals that the proportion of
data science research focusing on social good is less than it has ever been. At
the same time, the proliferation of machine learning and generative AI has
sparked debates about the sociotechnical prospects and challenges associated
with data science for human flourishing, organizations, and society. Against
this backdrop, we present a framework for "data science for social good" (DSSG)
research that considers the interplay between relevant data science research
genres, social good challenges, and different levels of sociot