In [5]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("test.pdf")
pages = loader.load_and_split()

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)
documents = text_splitter.split_documents(pages)
documents

[Document(metadata={'source': 'test.pdf', 'page': 0}, page_content='making LearnING enga ging A gain!www . y ogy abano . c om/W e ar e a WhatsApp b ased personalised GenA .I po w er ed micr ole ar ning  platf or m f or \nsp a c ed le ar ning t o deliv er bet t er r esul ts.'),
 Document(metadata={'source': 'test.pdf', 'page': 1}, page_content='P r o b l e m  W e  a r e  A d d r e s s i n gG a u t a m\nL e a r n e r sL e k h a\nL a c k  o f  p e r s o n a l i s a t i o n  i n  t r a i n i n g  p r o g r a m \nN o t  d e s i g n e d  f o r  l o w  a t t e n t i o n  s p a n \nN o  o p p o r t u n i t y  f o r  c o n t i n u o u s  &  s p a c e d  l e a r n i n \nD i f fi c u l t  t o  m a k e  l e a r n i n g  a  h a b i t \nL o t  o f  f r i c t i o n  i n  a c c e s s i n g  e- l e a r n i n g  c o n t e n tT a k e  w e e k s  a n d  m o n t h s  t o  c r e a t e  a n d  l a u n c h  a  \nt r a i n i n g  p r o g r a \nC u r r e n t  l e a r n i n g  m a n a g e m e n t  s y s t e m s 

In [7]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

In [8]:
from langchain.embeddings.openai import OpenAIEmbeddings

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
model_name = 'text-embedding-ada-002'
embed = OpenAIEmbeddings(
    model=model_name,
    openai_api_key=OPENAI_API_KEY
)


from langchain_community.vectorstores import FAISS
vetcordb = FAISS.from_documents(documents,OpenAIEmbeddings())


In [9]:

retriever = vetcordb.as_retriever()
retrieved_documents = retriever.invoke("What is LangChain?")
retrieved_documents[0].page_content

'P r o b l e m  W e  a r e  A d d r e s s i n gG a u t a m\nL e a r n e r sL e k h a\nL a c k  o f  p e r s o n a l i s a t i o n  i n  t r a i n i n g  p r o g r a m \nN o t  d e s i g n e d  f o r  l o w  a t t e n t i o n  s p a n \nN o  o p p o r t u n i t y  f o r  c o n t i n u o u s  &  s p a c e d  l e a r n i n \nD i f fi c u l t  t o  m a k e  l e a r n i n g  a  h a b i t \nL o t  o f  f r i c t i o n  i n  a c c e s s i n g  e- l e a r n i n g  c o n t e n tT a k e  w e e k s  a n d  m o n t h s  t o  c r e a t e  a n d  l a u n c h  a  \nt r a i n i n g  p r o g r a \nC u r r e n t  l e a r n i n g  m a n a g e m e n t  s y s t e m s  a r e  n o t  \nt u n e d  t o  t h e  n e e d  o f  f r o n t l i n e  a n d  l o w  a t t e n t i o n  \ns p a n  l e a r n e r \nL e a r n i n g  w o r k fl o w  i s  d e t a c h e d  f r o m  d a i l y  w o r k\nT r a i n i n g  I n s t r u c t o r'

In [10]:
from langchain_openai import OpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessages


llm = OpenAI()
prompt = ChatPromptTemplate.from_template(""""
                                          System : Answer the following question based only on the provided context Think step by step to before providing a detailed answer.
                                          <context>
                                          {context}
                                          <context>
                                          Question:{input}
                                          """)
documents_chain = create_stuff_documents_chain(llm,prompt)

In [11]:
from langchain.chains import create_retrieval_chain
retriever_chain = create_retrieval_chain(retriever,documents_chain)

In [16]:
# test Code to create an API end point and get output from the user 
# this will not work cause client side code is not here as well as the coe here is running on notebook not on .py file so that will also restrict it from running.

###############################################  CODE STARTS BELOW   ########################################################
from fastapi import FastAPI
import uvicorn


app = FastAPI(
    title="Langchain Server",
    version = "1.0",
    description="A simple API Server"
)

@app.get("/answer")
def genAnswer(input:str):
    response = retriever_chain.invoke({"input":input})
    return response['answer']

if __name__ == "__main__":
    uvicorn.run(app,host="localhost",port=8000)


pleaase tell me a little about this pdf in brief


'\nAnswer: This PDF provides information about a team of individuals with experience in entrepreneurship, digital and fintech sectors, and financial inclusion startups. They have also founded and been early members of multiple startups. The team includes Siva Cotipalli, who has 15+ years of experience and holds degrees from BITS Pilani, IIM-Kozhikode, and London School of Economics. The PDF also introduces an AI-assisted training platform for blue and grey collar workers, with features such as WhatsApp-based learning, on-the-job training support, and learning in various Indian languages. It also offers insights on employee engagement and training analytics.'