In [None]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("./new_knowledge_base.pdf")
pages = loader.load()

In [97]:
pages[0]

Document(metadata={'source': '/content/sample_data/new_knowledge_base.pdf', 'page': 0}, page_content='I am a second-year IT undergraduate at the University of Moratuwa, Sri Lanka. I enjoy collaborating with \nlike-minded individuals who have a growth mindset and aspire to contribute to global technology in the \ncoming years. \n \nI am proficient in programming and scripting languages such as Python, C, C++, Java, JavaScript, HTML, \nand CSS. \n \nIn addition to that, I have significant experience in web development, and my skills include React, \nNode.js, Next.js, Express, MongoDB, MySQL, Tailwind CSS, Bootstrap, HTML, and CSS. \n \nI am currently developing my machine learning and AI-related skills, and I am proficient in Scikit-learn, \nNumPy, Pandas, and Matplotlib in the domains of machine learning, artificial intelligence, and data \nscience.  ')

In [98]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
r_splitter = RecursiveCharacterTextSplitter(chunk_size=100,
    chunk_overlap=0,separators=["\n \n"])

In [None]:
splits = r_splitter.split_documents(pages)
print(len(splits[0].page_content))

224


In [162]:
import google.generativeai as genai
from google.colab import userdata
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",api_key=GOOGLE_API_KEY)

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=GOOGLE_API_KEY)

In [166]:
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embeddings,
    persist_directory="/db/chroma"
)

In [167]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

In [168]:
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. Basically questions should be about an undergraduate individual ofuniversity of Moratuwa Srilanka. If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

from langchain.chains import RetrievalQA
question = "who is this about"
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectordb.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})


result = qa_chain({"query": question})
result["result"]

'This is about a second-year IT undergraduate at the University of Moratuwa, Sri Lanka. Thanks for asking! \n'

In [169]:
qa_chain({"query": "mention about your all skills"})["result"]

'My skills include React, Node.js, Next.js, Express, MongoDB, MySQL, Tailwind CSS, Bootstrap, HTML, CSS, Python, C, C++, Java, JavaScript. \n\nThanks for asking! \n'