In [1]:
# Import OS and create the environment

import os
from dotenv import load_dotenv
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [2]:
# Load the source data
# Transform and Embed
# Vector store for storing the openai embeddings
# Retrieve the data

In [3]:
# PDF based loader

from langchain_community.document_loaders import PyPDFLoader

pdf0 = PyPDFLoader('TextBook_PDF/fepw1ps.pdf').load()
pdf1 = PyPDFLoader('TextBook_PDF/fepw101.pdf').load()
pdf2 = PyPDFLoader('TextBook_PDF/fepw102.pdf').load()
pdf3 = PyPDFLoader('TextBook_PDF/fepw103.pdf').load()
pdf4 = PyPDFLoader('TextBook_PDF/fepw104.pdf').load()
pdf5 = PyPDFLoader('TextBook_PDF/fepw105.pdf').load()
pdf6 = PyPDFLoader('TextBook_PDF/fepw106.pdf').load()
pdf7 = PyPDFLoader('TextBook_PDF/fepw107.pdf').load()

pdf7

[Document(page_content='24/A Pact with the Sun\nSAEEDA ’S mother had been ailing for a long time —fever, cough,\nbody-ache, painful joints and what not. T reated by a variety\nof physicians for weeks, she often showed signs of\nimprovement but soon r elapsed into her old, sick self, one\ncomplaint substituted by another . Though weak and\ncolourless, she was forbidden nor mal food and was under\nstrict or ders to r emain perpetually confined to her small,\ndingy r oom with doors and windows fastened, deprived of\nsunshine and fr esh air .\n•Saeeda’s mother has not received proper medical treatment\nfor her complaints.\n•She is denied healthy food, sunshine and fr esh air .\n•At last, she consults a good physician who gives her effective\nmedicine and sound advice.\nRationalised 2023-24\n', metadata={'source': 'TextBook_PDF/fepw107.pdf', 'page': 0}),
 Document(page_content='When she became critical, her relatives and neighbours\npersuaded her to consult a specialist even though his fee\

In [4]:
# Transform the data i.e split into chunks

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)

pdf_docs0 = text_splitter.split_documents(pdf0)
pdf_docs1 = text_splitter.split_documents(pdf1)
pdf_docs2 = text_splitter.split_documents(pdf2)
pdf_docs3 = text_splitter.split_documents(pdf3)
pdf_docs4 = text_splitter.split_documents(pdf4)
pdf_docs5 = text_splitter.split_documents(pdf5)
pdf_docs6 = text_splitter.split_documents(pdf6)
pdf_docs7 = text_splitter.split_documents(pdf7)

pdf_docs0

[Document(page_content='Rationalised 2023-24', metadata={'source': 'TextBook_PDF/fepw1ps.pdf', 'page': 0}),
 Document(page_content='First Edition\nFebruary 2006  Phalguna 1927\nReprinted\nJanuary 2007, October 2007,\nMarch 2009, December 2009,\nJanuary 2011, January 2012,\nDecember 2012, November 2013,\nDecember 2014, December 2015,\nDecember 2016, October 2017,\nDecember 2018, August 2019,\nAugust 2021 and November 2021\nRevised Edition\nNovember 2022 Agrahayana 1944\nPD 300T BS\n© National Council of Educational\nResearch and Training, 2006,\n2022\n` 35.00ALL RIGHTS RESERVED\n/boxshadowdwnNo part of this publication may be reproduced, stored in a retrieval system\nor transmitted, in any form or by any means, electronic, mechanical,\nphotocopying, recording or otherwise without the prior permission of the\npublisher.\n/boxshadowdwnThis book is sold subject to the condition that it shall not, by way of trade,\nbe lent,  re-sold, hired out or otherwise disposed of without the publisher’

In [7]:
# Create embeddings and vector store

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma, Typesense

chroma_db = Chroma.from_documents(pdf_docs1, OpenAIEmbeddings())



typesense_db = Typesense.from_documents(
pdf_docs1,
OpenAIEmbeddings(),
typesense_client_params = {
"host" : "czuxlts9ma6pewkdp-1.a1.typesense.net",
"port" : "443",
"protocol" : "https",
"typesense_api_key" : "nezaUQR27h4ClkDLgC32nS0wKFaGmBCb",
"typesense_collection_name" : "lang-chain"
},)


In [9]:
from langchain_openai import OpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.retrieval import create_retrieval_chain


openai_model = OpenAI(model="gpt-3.5-turbo-instruct")

prompt = ChatPromptTemplate.from_template("""
Answer the following Question based only on the provided context.
Think step by step before providing a detailed answer. I will tip you
$2000 if the user finds the answer helpful.
<context>
{context}                                              
</context>                                            
Question : {input}""")


# Create a chain
doc_chain = create_stuff_documents_chain(openai_model, prompt)

retriever = typesense_db.as_retriever()
retriever_chain = create_retrieval_chain(retriever, doc_chain)

response = retriever_chain.invoke({"input" : "Explain about the 2 birds"})    # "Explain about textbook development commitee"



In [11]:
response['answer']

' and their behavior as told by the rishi.\nStep 1: The king of the country came to the forest for hunting and got lost. He met a bird who tried to lure him towards robbers.\nStep 2: The king then reached an ashram where he met another bird who welcomed him and told him about the behavior of his brother bird.\nStep 3: The rishi explained that the first bird was influenced by the company of robbers and imitated their behavior, while the second bird was kind and welcoming due to being raised in a peaceful environment.\nStep 4: The rishi also compared the behavior of the birds to the saying "one is known by the company one keeps" and concluded that it is important to choose good company.\nStep 5: The story teaches the moral of choosing the right company and being influenced by it.\nStep 6: This story can be summed up as "One is known by the company one keeps".'