In [4]:
import requests
import pandas as pd
from bs4 import BeautifulSoup as bs

class CryptoScraper:
    URLS = {
        "Most Active": "https://finance.yahoo.com/markets/crypto/most-active/",
        "Top Gainers": "https://finance.yahoo.com/markets/crypto/gainers/",
        "Top Losers":  "https://finance.yahoo.com/markets/crypto/losers/",
    }

    HEADERS = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/120.0.0.0 Safari/537.36"
        )
    }

    def fetch_html(self, url: str) -> str:
        response = requests.get(url, headers=self.HEADERS, timeout=10)
        response.raise_for_status()
        return response.text

    def parse_crypto_table(self, html: str, category: str, limit: int = 3) -> list[dict]:
        soup = bs(html, "html.parser")
        table = soup.find("table")
        if table is None:
            return []

        raw_headers = table.find("thead").find_all("th")

        header_cells = [
            th
            for th in raw_headers
            if th.get("data-testid-header") != "sparkline"
        ]

        col_names = [th.get_text(strip=True) for th in header_cells][:-1]

        rows_data: list[dict] = []
        body_rows = table.find("tbody").find_all("tr")

        for row in body_rows[:limit]:
            raw_cells = row.find_all("td")

            cells = [
                td
                for td in raw_cells
                if td.get("data-testid-cell") != "sparkline"
            ]
            cell_values = [td.get_text(strip=True) for td in cells][:-1]

            data = dict(zip(col_names, cell_values))
            data["Category"] = category
            rows_data.append(data)

        return rows_data

    def main(self):
        all_rows: list[dict] = []

        for category, url in self.URLS.items():
            html = self.fetch_html(url)
            rows = self.parse_crypto_table(html, category, limit=3)
            all_rows.extend(rows)

        df = pd.DataFrame(all_rows)

        if "Category" in df.columns:
            ordered_cols = ["Category"] + [c for c in df.columns if c != "Category"]
            df = df[ordered_cols]

        df.to_csv("crypto_raw.csv", index=False)

        

        pd.set_option("display.max_columns", None)
        pd.set_option("display.width", 160)

        print(df.to_string(index=False))


if __name__ == "__main__":
    
    scraper = CryptoScraper()
    scraper.main()                  
       


   Category          Symbol                    Name                  Price Change   Change % Market Cap  Volume Volume InCurrency (24hr) Total Volume AllCurrencies (24hr) CirculatingSupply 52 WkChange %
Most Active       UUSDT-USD         Tether USDt USD      1.00-0.00(-0.01%)  -0.00     -0.01%   185.668B 58.329B                  58.329B                           58.329B          185.632B        -0.07%
Most Active  MMPRO31258-USD        Max Property USD      0.01-0.00(-0.01%)  -0.00     -0.01%          0   45.2B                    45.2B                             45.2B                 0       -99.96%
Most Active        BBTC-USD             Bitcoin USD 89,458.34+5.80(+0.01%)  +5.80     +0.01%     1.785T 41.591B                  41.591B                           41.591B           19.958M       -10.54%
Top Gainers      HHOCAI-USD Heroes of Crypto AI USD  0.01+0.01(+7,889.33%)  +0.01 +7,889.33%          0  37,220                   37,220                            37,220                 0