### **Enhancer.py**

In [1]:
import os 
import requests
from google import genai

In [2]:
from dotenv import load_dotenv
load_dotenv()

GEMINI_API_KEY = os.getenv("GEMINI_API")
GOOGLE_SEARCH_API_KEY = os.getenv("GOOGLE_SEARCH_API")
SEARCH_ENGINE_ID = os.getenv("SEARCH_ENGINE_ID")

In [6]:
class Enhancer:
    """Class to enhance search queries and content using Gemini AI."""

    def __init__(self):
        if not GEMINI_API_KEY:
            raise ValueError("Gemini API key is missing! Add it to .env")

        self.client = genai.Client(api_key=GEMINI_API_KEY)

    def enhance_query(self, original_query: str) -> str:

        prompt = f"""
        Optimize this search query for maximum relevant web search results:
        {original_query}
        Return ONLY ONE search query.
        """

        response = self.client.models.generate_content(
            model="gemini-2.0-flash",
            contents=[prompt]
        )
        return response.text

    def enhance_content(self, original_content: str) -> str:

        prompt = f"""
        OPTIMIZE and SUMMARIZE this content for maximum relevance and clarity:

        {original_content}
        
        DO NOT miss out on any important information.
        DO NOT add any new information.
        DO NOT change the meaning of the content.
        """

        response = self.client.models.generate_content(
            model="gemini-2.0-flash",
            contents=[prompt]
        )
        return response.text

### **Url-Search.py**

In [None]:
def websearch_url(enhanced_query, num_results=3): #Returns a list of URLs using Google Search API!

    search_url = "https://www.googleapis.com/customsearch/v1"
    
    params = {
        "q": enhanced_query,  
        "key": GOOGLE_SEARCH_API_KEY,  
        "cx": SEARCH_ENGINE_ID,  
        "num": num_results
    }
    
    response = requests.get(search_url, params=params)
    results = response.json()

    if "items" in results:
        return [item["link"] for item in results["items"]]
    else:
        return ["No results found"]

### **Scrapper.py**

In [3]:
import os 
import re 

import requests
from bs4 import BeautifulSoup
from firecrawl import FirecrawlApp

In [4]:
from dotenv import load_dotenv 
load_dotenv()

FIRECRAWL_API = os.getenv("FIRECRAWL_API")

In [5]:
class WebScraper:
    def __init__(self):
        if not FIRECRAWL_API:
            raise ValueError("Firecrawl API key is missing! Add it to .env")

        self.app = FirecrawlApp(api_key=FIRECRAWL_API)
        self.output_dir = "Sessions"
        os.makedirs(self.output_dir, exist_ok=True)

    def clean_text(self, content: str) -> str:
        soup = BeautifulSoup(content, "html.parser")
        extracted_text = []

        for heading in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
            extracted_text.append(f"\n{heading.name.upper()}: {heading.get_text(strip=True)}")

        for paragraph in soup.find_all("p"):
            extracted_text.append(paragraph.get_text(strip=True))

        for li in soup.find_all("li"):
            extracted_text.append(f"• {li.get_text(strip=True)}")

        for table in soup.find_all("table"):
            for row in table.find_all("tr"):
                cells = [cell.get_text(strip=True) for cell in row.find_all(["th", "td"])]
                extracted_text.append(" | ".join(cells))

        return "\n".join(extracted_text) if extracted_text else "No relevant text found."

    def scrape(self, url: str) -> str:
        print(f"\n🔍 Scraping: {url}")

        scrape_result = self.app.scrape_url(url, params={'formats': ['html']})
        if 'html' not in scrape_result or not scrape_result['html']:
            print("❌ No HTML content found.")
            return [False,None]

        filtered_text = self.clean_text(scrape_result['html'])

        file_path = os.path.join(self.output_dir, "Cleaned.txt")
        with open(file_path, "w", encoding="utf-8") as f:
            f.write(filtered_text)

        print(f"✅ Saved: {file_path}")
        return [True,filtered_text]

    def crawl(self, url: str, depth: int = 1, limit: int = 100) -> str:
        print(f"\n🔍 Crawling: {url} (Depth: {depth})")

        crawl_status = self.app.crawl_url(
            url,
            params={
                'limit': limit,
                'scrapeOptions': {'formats': ['html']}
            },
            poll_interval=30
        )

        if 'pages' not in crawl_status:
            print("❌ No pages found!")
            return [False,None]

        all_text = []

        for i, page in enumerate(crawl_status['pages']):
            page_url = page['url']
            print(f"📄 Scraping page {i+1}: {page_url}")

            page_data = self.app.scrape_url(page_url, params={'formats': ['html']})
            if 'html' in page_data and page_data['html']:
                filtered_text = self.clean_text(page_data['html'])
                all_text.append(filtered_text)

        merged_content = "\n\n".join(all_text)

        file_path = os.path.join(self.output_dir, "Cleaned.txt")
        with open(file_path, "w", encoding="utf-8") as f:
            f.write(merged_content)

        print(f"✅ Saved: {file_path}")
        return [True, merged_content]