In [32]:
!jupyter nbconvert --to script temp1.ipynb

[NbConvertApp] Converting notebook temp1.ipynb to script
[NbConvertApp] Writing 3156 bytes to temp1.py


In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv('open_ai_key')

# Wikipedia data fetching

In [3]:
#data fetching
#wikipedia
from langchain_community.utilities import WikipediaAPIWrapper
#to run the query
from langchain_community.tools import WikipediaQueryRun
#it will create a tool for wikipedia
api=WikipediaAPIWrapper(top_k_results=1,doc_content_chars_max=250)
wiki=WikipediaQueryRun(api_wrapper=api)

# PDF and Website data fetching

In [19]:
#it uses pypdf and beautiful soup to extract
from langchain_community.document_loaders import PyPDFLoader,WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
#extracting pdf and splitting docs
docs=PyPDFLoader(r'D:\azure-session\NCERT-Class-10-History.pdf').load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1024,chunk_overlap=256)
split_docs=text_splitter.split_documents(docs)
# extracting web and spliting
web_docs=WebBaseLoader('https://docs.python.org/3/tutorial/classes.html').load()
split_web=text_splitter.split_documents(web_docs)

In [31]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

#creating a db of embeddings for pdfs 
#it stores the embedding into RAM you can use vectordb like chromadb which stores into persistent storage
db_pdf=FAISS.from_documents(docs,OpenAIEmbeddings())
#creating a db of embeddings for website
db_web=FAISS.from_documents(web_docs,OpenAIEmbeddings())
#creating a retreiver
#it will retreive related data from the db of pdfs
retreiver_pdf=db_pdf.as_retriever()
#it will retreive related data from the db of webs
retreiver_web=db_web.as_retriever()

#creating a tool which retreives which contains metadata
from langchain.tools.retriever import create_retriever_tool
#name should not contain space
retreiver_tool_pdf=create_retriever_tool(retreiver_pdf,'History_book','It contains history of nationalism in Europe and India for the secondary class students')
retreiver_tool_web=create_retriever_tool(retreiver_web,'Python_documentation','It contains about the classes and object in python')

In [25]:
tools=[retreiver_tool_pdf,retreiver_tool_web,wiki]

In [26]:
from langchain_openai import ChatOpenAI
#you can use different llms
llm=ChatOpenAI(model='gpt-3.5-turbo',temperature=0.5)

In [27]:
#writing prompt
from langchain import hub
#you can use custom prompts mentioned in documentation
prompt=hub.pull('hwchase17/openai-functions-agent')

In [28]:
from langchain.agents import create_openai_functions_agent,AgentExecutor
agent=create_openai_functions_agent(llm=llm,tools=tools,prompt=prompt)
from langchain_core.output_parsers import StrOutputParser
output_parser=StrOutputParser()
agent_executer=AgentExecutor(agent=agent,tools=tools,verbose=True,output_parser=output_parser)

In [10]:
import streamlit as st
st.title('RAG with multiple sources')
input_text=st.text_input('Write your query.')

if input_text:
    op=agent_executer.invoke({'input':input_text})
    st.write(op['output'])

2024-06-10 15:35:05.027 
  command:

    streamlit run C:\Users\shiri\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-06-10 15:35:05.029 Session state does not function when running a script without `streamlit run`
