In [2]:
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.tools.wikipedia.tool import WikipediaQueryRun 
from langchain_community.tools.arxiv.tool import ArxivQueryRun
from langchain_classic.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_classic.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
import os
from dotenv import load_dotenv

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="pdfminer")

  from pydantic.v1.fields import FieldInfo as FieldInfoV1


In [3]:
load_dotenv()

#Provide your key directly
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 

# Defining model and embeddings
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2)
embeddings = OpenAIEmbeddings()


  embeddings = OpenAIEmbeddings()


In [4]:
# Providing Path of file 
pdf_path = r"C:\Users\Admin\Downloads\AWS_Certified AI Practitioner_ Slides.pdf"  

# Loading the pdf 
loader = PyMuPDFLoader(pdf_path)
docs = loader.load()

# Splitting the documents into manageable chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
chunks = splitter.split_documents(docs)

# Creating a vector store from the document chunks and rag chain
vectorstore = FAISS.from_documents(chunks, embedding=embeddings)
retriever = vectorstore.as_retriever()
rag_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)


# Wiki and Arxiv chains
wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
arxiv = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())

In [7]:

# Main bot function
def ask_bot(user_query):
    # Router prompt
    router_prompt = f"""
You are a router. Choose one source for this question:
- RAG for AWS/AI Practitioner PDF and AWS related questions
- WIKI for general knowledge
- ARXIV for academic/research
- LLM for anything else

Respond with exactly one word: RAG, WIKI, ARXIV, or LLM.

Question: {user_query}
"""
    # Decide route using LLM
    decision = llm.invoke(input=router_prompt).content.strip().upper()
    print(f"üîç Router chose: {decision}")

    # Call appropriate tool
    if decision == "RAG":
        return rag_chain.run(user_query)
    elif decision == "WIKI":
        return wiki.run(user_query)
    elif decision == "ARXIV":
        return arxiv.run(user_query)
    else:
        # Default to LLM
        return llm.invoke(input=user_query).content

# Example usage
query = "What is aws sagemaker?"
answer = ask_bot(query)
print(answer)


üîç Router chose: RAG


  return rag_chain.run(user_query)


Amazon SageMaker is a fully managed service provided by AWS that enables developers and data scientists to build, train, and deploy machine learning (ML) models at scale. It offers a range of tools and features for the entire machine learning workflow, including:

- **Model Deployment & Inference**: Supports both asynchronous and batch processing for making predictions.
- **Model Cards**: Documentation for ML models.
- **Model Dashboard**: A centralized view of all models.
- **Model Monitor**: Monitoring and alerting for models.
- **Model Registry**: A repository for managing ML model versions.
- **Pipelines**: Continuous integration and continuous deployment (CICD) for machine learning.
- **Role Manager**: Access control for managing permissions.
- **JumpStart**: A hub for ML models and pre-built solutions.
- **Canvas**: A no-code interface for building ML models.

SageMaker simplifies the process of developing machine learning applications by providing a comprehensive set of tools an

In [6]:
#import streamlit as st