In [2]:
import requests
import google.generativeai as genai
from bs4 import BeautifulSoup

genai.configure(api_key="Enter Your Gemini API Key")
model = genai.GenerativeModel('gemini-pro')

SERPAPI_KEY = "Enter Your SERP API Key"

class SerpApiRAG:
    def __init__(self):
        self.search_endpoint = "https://serpapi.com/search"

    def search_web(self, query, num_results=10):
        params = {
            "api_key": SERPAPI_KEY,
            "engine": "google",
            "q": query,
            "num": num_results,
            "location": "United States",
            "google_domain": "google.com"
        }

        try:
            response = requests.get(self.search_endpoint, params=params)
            results = response.json()
            return [r.get("link") for r in results.get("organic_results", [])][:num_results]
        except Exception as e:
            print(f"Search error: {str(e)}")
            return []

    def fetch_page_content(self, url):
        try:
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
            }
            response = requests.get(url, headers=headers, timeout=15)
            soup = BeautifulSoup(response.content, 'html.parser')

            for element in soup(['script', 'style', 'nav', 'footer', 'header']):
                element.decompose()

            main_content = soup.find('article') or soup.find('main') or soup.body
            paragraphs = main_content.find_all(['p', 'h1', 'h2', 'h3']) if main_content else []
            text = '\n'.join([p.get_text().strip() for p in paragraphs if p.get_text().strip()])
            return text[:10000]
        except Exception as e:
            print(f"Error fetching {url}: {str(e)}")
            return ""

    def generate_answer(self, query, context_chunks):
        context = "\n\n".join([f"Source {i+1}:\n{text}" for i, text in enumerate(context_chunks)])

        prompt = f"""Analyze the following information from web sources and answer the question.
        Follow these rules:
        1. Be factual and concise
        2. Acknowledge conflicting information if present
        3. Cite sources using [1], [2] numbering
        4. If information is inconsistent, say so

        Context:
        {context}

        Question: {query}

        Answer:"""

        response = model.generate_content(prompt)
        return response.text

    def rag_query(self, query, num_sources=3):
        # Web search
        urls = self.search_web(query, num_results=num_sources)
        if not urls:
            return "No relevant information found through web search"

        # Content retrieval
        context_chunks = []
        for url in urls:
            content = self.fetch_page_content(url)
            if content:
                context_chunks.append(f"URL: {url}\nContent: {content}")

        # Answer generation
        return self.generate_answer(query, context_chunks)

# Usage
rag = SerpApiRAG()
answer = rag.rag_query("how many parameters are in deepseek r1")
print("Answer:", answer)

Answer: DeepSeek R1 has up to 671 billion parameters in its flagship releases. [1]
