**RAG Implementation**

In [1]:
!pip install langchain pinecone-client google-generativeai openai tqdm chromadb langchain_community langchain-google-genai --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.8/244.8 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m605.5/605.5 kB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m59.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m61.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m41.9 MB/s[0m eta [36m0:00:00[

In [2]:
# Step 2: Import Required Modules
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Chroma
from langchain.document_loaders import CSVLoader
from langchain.prompts import PromptTemplate

In [4]:
# Step 3: Download the dataset and load it locally
import requests

path = '/content/faq_bot_university.csv'
# changed line
loader = CSVLoader(file_path=path, source_column='Answer')
documents = loader.load()

In [5]:
documents

[Document(metadata={'source': 'The tuition fee for undergraduate programs is $15,000 per year.', 'row': 0}, page_content='Question: What is the tuition fee for undergraduate programs?\nAnswer: The tuition fee for undergraduate programs is $15,000 per year.'),
 Document(metadata={'source': 'The admission deadline is April 30.', 'row': 1}, page_content='Question: When is the admission deadline?\nAnswer: The admission deadline is April 30.'),
 Document(metadata={'source': 'Courses include AI, Data Science, and Web Development.', 'row': 2}, page_content='Question: What courses are offered in Computer Science?\nAnswer: Courses include AI, Data Science, and Web Development.'),
 Document(metadata={'source': 'Yes, international students can apply for financial aid.', 'row': 3}, page_content='Question: Can international students apply for financial aid?\nAnswer: Yes, international students can apply for financial aid.'),
 Document(metadata={'source': 'Yes, on-campus housing is available for all

In [6]:
from google.colab import userdata
key=userdata.get('GOOGLE_API_KEY')

In [7]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

In [8]:
docs

[Document(metadata={'source': 'The tuition fee for undergraduate programs is $15,000 per year.', 'row': 0}, page_content='Question: What is the tuition fee for undergraduate programs?\nAnswer: The tuition fee for undergraduate programs is $15,000 per year.'),
 Document(metadata={'source': 'The admission deadline is April 30.', 'row': 1}, page_content='Question: When is the admission deadline?\nAnswer: The admission deadline is April 30.'),
 Document(metadata={'source': 'Courses include AI, Data Science, and Web Development.', 'row': 2}, page_content='Question: What courses are offered in Computer Science?\nAnswer: Courses include AI, Data Science, and Web Development.'),
 Document(metadata={'source': 'Yes, international students can apply for financial aid.', 'row': 3}, page_content='Question: Can international students apply for financial aid?\nAnswer: Yes, international students can apply for financial aid.'),
 Document(metadata={'source': 'Yes, on-campus housing is available for all

In [9]:
# Step 4: Initialize Embeddings
embedding_model = GoogleGenerativeAIEmbeddings(google_api_key=key,  model="models/text-embedding-004")

In [10]:
# Step 5: Create a Chroma Vector Store
vector_store = Chroma.from_documents(documents, embedding=embedding_model)

In [11]:
llm = ChatGoogleGenerativeAI(google_api_key=key,
                                   model="gemini-1.5-flash")

In [12]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

print(type(retriever))

<class 'langchain_core.vectorstores.base.VectorStoreRetriever'>


In [13]:
sample_docs = retriever.get_relevant_documents("admission requirements")
for doc in sample_docs:
    print(doc.page_content)

  sample_docs = retriever.get_relevant_documents("admission requirements")


Question: When is the admission deadline?
Answer: The admission deadline is April 30.
Question: When is the admission deadline?
Answer: The admission deadline is April 30.
Question: When is the admission deadline?
Answer: The admission deadline is April 30.
Question: When is the admission deadline?
Answer: The admission deadline is April 30.
Question: When is the admission deadline?
Answer: The admission deadline is April 30.


In [14]:
from langchain_core.messages import SystemMessage
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
chat_template = ChatPromptTemplate.from_messages([
    # System Message Prompt Template
    SystemMessage(content="""You are a Helpful Faq Bot.
                  Given a context and question from user,
                  you should answer based on the given context."""),
    # Human Message Prompt Template
    HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
    Context: {context}
    Question: {question}
    Answer: """)
])

output_parser = StrOutputParser()

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | chat_template
    | llm
    | output_parser
)

In [15]:
# response = rag_chain.invoke("""Please summarize Leave No Context Behind:
#                             Efficient Infinite Context Transformers with Infini-attention""")

# response

In [16]:
response = rag_chain.invoke("""What courses are offered in Computer Science?""")

response

'Courses include AI, Data Science, and Web Development.\n'

In [17]:
# Step 7: Set Up Retrieval-Augmented Generation (RAG) Chain
from langchain.chains import RetrievalQA
faq_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever,chain_type="stuff",)

In [18]:
# Step 9: Ask a Question
question = "What courses are offered in Computer Science?"
response = faq_chain.run(question)

print("Response:", response)

  response = faq_chain.run(question)


Response: Courses include AI, Data Science, and Web Development.

