In [None]:
!pip install -qU langchain pinecone-client google-generativeai openai tqdm chromadb langchain_community langchain-google-genai CSVLoader pypdf PyPDFLoader


In [3]:
from google.colab import userdata
key=userdata.get('GOOGLE_API_KEY')


In [4]:
from langchain_google_genai import ChatGoogleGenerativeAI

chat_model = ChatGoogleGenerativeAI(google_api_key=key,
                                   model="gemini-1.5-flash")

# RAG

In [5]:
# import csv loader
from langchain.document_loaders.csv_loader import CSVLoader
# import text splitter
from langchain.text_splitter import CharacterTextSplitter

# Load the PDF using PyPDFLoader
loader = CSVLoader("/content/faq_bot_university.csv")
documents = loader.load_and_split()  # Returns a list of Document objects

In [6]:
# to show some lines of documents
documents[0].page_content[:100]

'Question: What is the tuition fee for undergraduate programs?\nAnswer: The tuition fee for undergradu'

In [7]:
# Extract the text content from the Document objects
pages = [doc.page_content for doc in documents]

In [8]:
len(pages)

1000

In [9]:
# Initialize the CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)

# Split the text into chunks
chunks = text_splitter.create_documents(pages)

# Display results
print(f"Number of chunks: {len(chunks)}")
print(f"Type of first chunk: {type(chunks[0])}")

Number of chunks: 1000
Type of first chunk: <class 'langchain_core.documents.base.Document'>


In [10]:
chunks[3]

Document(metadata={}, page_content='Question: Can international students apply for financial aid?\nAnswer: Yes, international students can apply for financial aid.')

In [11]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding_model = GoogleGenerativeAIEmbeddings(google_api_key=key, model="models/embedding-001")

In [12]:
embedding_model = GoogleGenerativeAIEmbeddings(google_api_key=key,  model="models/text-embedding-004",  chunk_size=100,task_type="retrieval_document")


In [13]:

from langchain_community.vectorstores import Chroma
vector_store = Chroma.from_documents(documents, embedding=embedding_model)

In [14]:
llm = ChatGoogleGenerativeAI(google_api_key=key,
                                   model="gemini-1.5-flash")

In [15]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

print(type(retriever))

<class 'langchain_core.vectorstores.base.VectorStoreRetriever'>


In [16]:
from langchain_core.messages import SystemMessage
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate

In [17]:
chat_template = ChatPromptTemplate.from_messages([
    # System Message Prompt Template
    SystemMessage(content="""You are a Helpful AI Bot.
                  Given a context and question from user,
                  you should answer based on the given context."""),
    # Human Message Prompt Template
    HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
    Context: {context}
    Question: {question}
    Answer: """)
])

In [18]:

from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

In [19]:

from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | chat_template
    | chat_model
    | output_parser
)

In [21]:
response = rag_chain.invoke("""what is the tution fee for undergraduate program""")

response

'$15,000 per year'

In [22]:
response = retriever.invoke("""explain about the document""")

response

[Document(metadata={'row': 871, 'source': '/content/faq_bot_university.csv'}, page_content='Question: When is the admission deadline?\nAnswer: The admission deadline is April 30.'),
 Document(metadata={'row': 171, 'source': '/content/faq_bot_university.csv'}, page_content='Question: When is the admission deadline?\nAnswer: The admission deadline is April 30.'),
 Document(metadata={'row': 436, 'source': '/content/faq_bot_university.csv'}, page_content='Question: When is the admission deadline?\nAnswer: The admission deadline is April 30.'),
 Document(metadata={'row': 701, 'source': '/content/faq_bot_university.csv'}, page_content='Question: When is the admission deadline?\nAnswer: The admission deadline is April 30.'),
 Document(metadata={'row': 351, 'source': '/content/faq_bot_university.csv'}, page_content='Question: When is the admission deadline?\nAnswer: The admission deadline is April 30.')]