**Installing Required Libraries**

In [1]:
! pip install langchain_openai langchain langchain_community langchain_pinecone docarray pydantic==2.7.0 python-dotenv pandas tiktoken PyPDF2 fastapi uvicorn --quiet 

**LLM Initialization**

In [2]:
import os
from dotenv import load_dotenv
from langchain_openai.chat_models import ChatOpenAI

load_dotenv()

# Initialize OpenAI model
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo")

**Loading the PDF** 

In [3]:
from PyPDF2 import PdfReader

# Initialize PDF reader
pdf_reader = PdfReader('knowledge_base.pdf')

# Extract text from each page
raw_text = ''
for page in pdf_reader.pages:
    content = page.extract_text()
    if content:
        raw_text += content

In [15]:
type(raw_text)

str

**Chunking Text**

In [5]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document

# Initialize text splitter
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=100,
    chunk_overlap=20,
    length_function=len
)

# Split the raw text into chunks
texts = text_splitter.split_text(raw_text)

# Convert text chunks to Document objects
documents = [Document(page_content=text) for text in texts]

Created a chunk of size 101, which is longer than the specified 100


In [6]:
texts[:5], type(texts), len(texts)

(['Everyday we hear sounds from various\nsources like humans, bir ds, bells, machines,',
  'vehicles, televisions, radios etc. Sound is a\nform of ener gy which pr oduces a sensation',
  'of hearing in our ears. Ther e are also other\nforms of energy like mechanical energy, light',
  'energy, etc. W e have talked about mechanical\nenergy in the pr evious chapters. Y ou have',
  'been taught about conservation of energy,\nwhich states that we can neither create nor'],
 list,
 413)

**Creating Embeddings**

In [7]:
from langchain_openai.embeddings import OpenAIEmbeddings

# Initialize OpenAI embeddings
embeddings = OpenAIEmbeddings()

**Setting up the Vector Store**

In [8]:
from langchain_pinecone import PineconeVectorStore

index_name = "agentic-rag"

# Initialize Pinecone vector store
pinecone = PineconeVectorStore.from_documents(
    documents, embeddings, index_name=index_name
)

  from tqdm.autonotebook import tqdm


**Chaining Components for Contextual Question Answering**

In [9]:
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

# Output parser
parser = StrOutputParser()

# Prompt template
template = """
Answer the question based on the context below. If you can't answer the question, reply "I'm unable to resolve that with my current capabilities.".

Context: {context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

retrieved_contexts = []

# Log the retrieved context
def log_context(inputs):
    context_content = [input.page_content for input in inputs['context']]
    retrieved_contexts.append(context_content)
    return inputs

# Chain to retrieve context, log, run the prompt, and parse the output
chain = {
    "context": pinecone.as_retriever(search_kwargs={"k": 5}), 
    "question": RunnablePassthrough()
} | RunnablePassthrough(log_context) | prompt | model | parser

**Agent for Selective Retrieval**

In [10]:
import re

class DomainClassifierTool:
    def __init__(self, domain, keywords):
        self.domain = domain
        self.keywords = keywords

    def classify_query(self, query):
        # Convert query to lowercase for case-insensitive matching
        query_lower = query.lower()
        for keyword in self.keywords:
            # Check if keyword is in the query
            if re.search(r'\b' + re.escape(keyword.lower()) + r'\b', query_lower):
                return True
        return False

if __name__ == "__main__":
    # Keywords related to the "Sound" chapter
    relevant_keywords = [
        "sound", "wave", "vibration", "frequency", "amplitude", "wavelength",
        "pitch", "loudness", "echo", "reverberation", "sonic boom", "ultrasound",
        "infrasound", "decibel", "noise pollution", "musical instrument",
        "longitudinal wave", "compressions", "rarefactions", "speed of sound",
        "medium", "reflection of sound", "absorption of sound"
    ]
    # Initialize the classifier for the Sound chapter domain
    selective_retrieval_agent = DomainClassifierTool("Sound Chapter (NCERT Class 9)", relevant_keywords)

**Agent for Web Search**

In [11]:
import requests
from urllib.parse import quote_plus

class GoogleSearchClient:
    def __init__(self, api_key):
        self.api_key = api_key
        self.search_engine_id = os.getenv("SEARCH_ENGINE_ID")
        self.base_url = "https://www.googleapis.com/customsearch/v1"

    def search(self, query, num_results=3):
        # Build the URL for the search query
        encoded_query = quote_plus(query)
        url = f"{self.base_url}?key={self.api_key}&cx={self.search_engine_id}&q={encoded_query}&num={num_results}"
        
        try:
            # Send the request and return links from the results
            response = requests.get(url)
            response.raise_for_status()
        except requests.exceptions.RequestException as e:
            print(f"Error during search: {e}")
            return []

        results = response.json().get('items', [])
        return [item['link'] for item in results]

class SearchManager:
    def __init__(self, web_search_tool):
        self.web_search_tool = web_search_tool

    def get_search_results(self, query):
        # Retrieve search results or return a fallback message
        search_results = self.web_search_tool.search(query)
        return search_results if search_results else "No results found."

# Initialize search client and manager
api_key = os.getenv("CUSTOM_SEARCH_API_KEY")
web_search_tool = GoogleSearchClient(api_key)
web_search_agent = SearchManager(web_search_tool)

In [12]:
# Example query
query = "Does sound follow the same laws of reflection as light does? Explain"
print(web_search_agent.get_search_results(query))

['https://www.toppr.com/ask/question/does-sound-follow-the-same-laws-of-reflection-as-light/', 'https://byjus.com/question-answer/does-sound-follow-the-laws-of-reflection-as-light-does/', 'https://www.doubtnut.com/pcmb-questions/130851']


**Text to Speech Service Integration using SarvamAI's API**

In [13]:
import base64

async def generate_speech(text: str) -> str:
    """Generate speech from text using the Sarvam API and save it as a WAV file."""
    url = "https://api.sarvam.ai/text-to-speech"

    # Prepare the payload for the API request
    payload = {
        "inputs": [text],
        "target_language_code": "hi-IN",
        "speaker": "meera",
        "pitch": 0,
        "pace": 1.65,
        "loudness": 1.5,
        "speech_sample_rate": 8000,
        "enable_preprocessing": True,
        "model": "bulbul:v1"
    }

    headers = {
        "api-subscription-key": os.getenv("SARVAMAI_API_KEY"),
        "Content-Type": "application/json"
    }

    # Make the API request
    response = requests.post(url, json=payload, headers=headers)

    if response.status_code == 200:
        audio_data = response.json().get('audios', [None])[0]
        if audio_data:
            file_path = 'output_audio.wav'
            # Save the audio data to a file
            with open(file_path, 'wb') as audio_file:
                audio_file.write(base64.b64decode(audio_data))  
            return file_path  
        else:
            raise Exception("No audio data found in the response")
    else:
        raise Exception(f"Error: {response.status_code}, {response.text}")

**FastAPI Endpoint - Navigate to http://localhost:8000/ or http://127.0.0.1:8000/ to access your application.**

In [14]:
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from fastapi.middleware.cors import CORSMiddleware  # Import CORS middleware
from pydantic import BaseModel
import uvicorn
import asyncio

app = FastAPI()

# Configure CORS middleware to allow cross-origin requests
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  
    allow_credentials=True,
    allow_methods=["*"],  
    allow_headers=["*"],  
)

# Define the input model for the query
class Query(BaseModel):
    question: str

# Endpoint to handle question-asking
@app.post("/ask/")
async def ask_rag(query: Query): 
    # Classify the query to check relevance
    if selective_retrieval_agent.classify_query(query.question):
        # Invoke the chain with the question
        response = chain.invoke(query.question)
        response += "\n" + "In response to your query, here are the links I found:"

        # Get related links from the web search agent
        links = web_search_agent.get_search_results(query.question)

        # Generate audio response for the text
        audio_file_path = await generate_speech(response)

        return {
            "response": response,  
            "links": links,       
            "audio_file": audio_file_path  
        }
    else:
        # If the query is not relevant
        response = "The query is not relevant to Chapter 11: Sound."
        
        # Generate audio response for the text
        audio_file_path = await generate_speech(response)

        return {
            "response": response,  
            "audio_file": audio_file_path  
        }
    
# Endpoint to serve generated audio files
@app.get("/audio/{file_name}")
def get_audio(file_name: str):
    file_path = os.path.join(os.getcwd(), file_name)  # Construct file path
    if os.path.exists(file_path):
        return FileResponse(file_path)  # Serve the audio file
    else:
        raise HTTPException(status_code=404, detail="Audio file not found")  # Return 404 if not found

# Endpoint to serve the frontend HTML
@app.get("/")
def serve_frontend():
    return FileResponse("frontend.html")  # Serve the frontend HTML file

if __name__ == "__main__":
    config = uvicorn.Config(app, host="0.0.0.0", port=8000)  # Configure Uvicorn server
    server = uvicorn.Server(config)
    
    # Run the server
    if not asyncio.get_event_loop().is_running():
        server.run()
    else:
        asyncio.create_task(server.serve())  # Create a task to run the server if the loop is already running

INFO:     Started server process [1436]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:61245 - "GET / HTTP/1.1" 200 OK
INFO:     127.0.0.1:61245 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:61245 - "POST /ask/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:61245 - "GET /audio/output_audio.wav HTTP/1.1" 200 OK
INFO:     127.0.0.1:61512 - "GET / HTTP/1.1" 200 OK
INFO:     127.0.0.1:61520 - "GET / HTTP/1.1" 200 OK
