In [35]:
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional
import aiohttp
from bs4 import BeautifulSoup
import logging

logger = logging.getLogger(__name__)

class BaseScraper(ABC):
    def __init__(self, base_url: str):
        self.base_url = base_url
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Connection': 'keep-alive',
        }
        self._session: Optional[aiohttp.ClientSession] = None

    async def get_session(self) -> aiohttp.ClientSession:
        """Get or create an aiohttp session."""
        if self._session is None or self._session.closed:
            timeout = aiohttp.ClientTimeout(total=60)
            self._session = aiohttp.ClientSession(headers=self.headers, timeout=timeout)
        return self._session

    async def close(self):
        """Close the session if it exists."""
        if self._session and not self._session.closed:
            await self._session.close()

    async def fetch_page(self, url: str) -> str:
        """Fetch page content asynchronously."""
        try:
            session = await self.get_session()
            async with session.get(url, allow_redirects=True, ssl=False) as response:
                print(response)
                if response.status == 200:
                    return await response.text()
                else:
                    logger.error(f"Error fetching {url}: Status {response.status}")
                    return ""
        except aiohttp.ClientError as e:
            logger.error(f"Network error fetching {url}: {str(e)}")
            return ""
        except Exception as e:
            logger.error(f"Unexpected error fetching {url}: {str(e)}")
            return ""

    def parse_html(self, html: str) -> BeautifulSoup:
        """Parse HTML content using BeautifulSoup with lxml parser."""
        return BeautifulSoup(html, 'lxml')

    async def __aenter__(self):
        """Support for async context manager."""
        await self.get_session()
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Cleanup for async context manager."""
        await self.close() 

In [36]:
scraper = BaseScraper("https://bitsdroid.com")
try:
    html = await scraper.fetch_page("https://bitsdroid.com")
    soup = scraper.parse_html(html)
    print(soup)
finally:
    await scraper.close()

<ClientResponse(https://bitsdroid.com) [200 OK]>
<CIMultiDictProxy('Connection': 'Keep-Alive', 'Keep-Alive': 'timeout=5, max=100', 'Content-Type': 'text/html; charset=UTF-8', 'Link': '<https://bitsdroid.com/wp-json/>; rel="https://api.w.org/"', 'Link': '<https://bitsdroid.com/wp-json/wp/v2/pages/6184>; rel="alternate"; title="JSON"; type="application/json"', 'Link': '<https://bitsdroid.com/>; rel=shortlink', 'Transfer-Encoding': 'chunked', 'Content-Encoding': 'br', 'Vary': 'Accept-Encoding', 'Date': 'Sat, 24 May 2025 17:52:03 GMT', 'Server': 'LiteSpeed', 'alt-svc': 'h3=":443"; ma=2592000, h3-29=":443"; ma=2592000, h3-Q050=":443"; ma=2592000, h3-Q046=":443"; ma=2592000, h3-Q043=":443"; ma=2592000, quic=":443"; ma=2592000; v="43,46"')>

<!DOCTYPE html>
<html lang="en-US">
<head>
<meta charset="utf-8"/>
<title>Bitsdroid | Tech News, Coding, Updates and Machine Learning </title>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<link href="https://bitsdroid.com/xmlrpc

In [1]:
import uuid
uuid.uuid4()

UUID('d89a7642-7638-4fff-baca-0f7e78f97019')