In [13]:
import cloudscraper
import time
from bs4 import BeautifulSoup

BASE_URL = "https://www.brecorder.com/trends/kse100"

def get_scraper():
    return cloudscraper.create_scraper(
        browser={
            "browser": "chrome",
            "platform": "windows",
            "mobile": False
        }
    )

def scrape_brecorder_homepage(scraper):
    print(f"Scraping homepage: {BASE_URL}")
    response = scraper.get(BASE_URL, timeout=20)

    if response.status_code != 200:
        raise RuntimeError(f"HTTP {response.status_code}")

    soup = BeautifulSoup(response.text, "html.parser")

    articles = []

    for article in soup.select("article.story[data-id]"):
        title_tag = article.select_one("h2.story__title a")
        if not title_tag:
            continue

        time_el = article.select_one(
            ".story__time .timestamp--time"
        )

        articles.append({
            "id": article.get("data-id"),
            "title": title_tag.get_text(strip=True),
            "url": title_tag.get("href"),
            "published_at": time_el.get("title") if time_el else None
        })

    print(f"Found {len(articles)} articles.")
    return articles

In [14]:
def scrape_brecorder_article(scraper, url):
    print(f"Scraping article: {url}")
    try:
        response = scraper.get(url, timeout=20)

        if response.status_code != 200:
            print(f"Failed to fetch {url}: HTTP {response.status_code}")
            return None

        soup = BeautifulSoup(response.text, "html.parser")

        content = soup.select_one("div.story__content")
        if not content:
            print(f"No content found for {url}")
            return None

        # ‚ùå Remove unwanted elements
        for tag in content.select(
            "script, style, canvas, raw-html, div[id^='div-gpt-ad']"
        ):
            tag.decompose()

        paragraphs = []
        for p in content.find_all("p"):
            text = p.get_text(" ", strip=True)
            if text:
                paragraphs.append(text)

        article_text = "\n\n".join(paragraphs)

        return article_text
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return None

In [15]:
# 1. Initialize Scraper
scraper = get_scraper()

# 2. Get Articles from Homepage
articles_data = scrape_brecorder_homepage(scraper)
# 3. Get Body for each article
for article in articles_data:
    # Construct full URL if relative
    article_url = article['url']
    if not article_url.startswith("http"):
        article_url = "https://www.brecorder.com" + article_url
        
        # Update the dictionary with full URL (optional but good practice)
    article['full_url'] = article_url

        # Scrape body
    body = scrape_brecorder_article(scraper, article_url)
    article['body'] = body

        # Optional: Sleep to prevent rate limiting
    time.sleep(1)

# 4. Result
print(f"\nSuccessfully scraped {len(articles_data)} articles with details.")
    
# Display sample of the first article
if articles_data:
    print("-" * 40)
    print("Sample Article Data:")
    print(f"Title: {articles_data[0]['title']}")
    print(f"Date: {articles_data[0]['published_at']}")
    print(f"ID: {articles_data[0]['id']}")
    print(f"URL: {articles_data[0]['full_url']}")
    print(f"Body Preview: {articles_data[0]['body'][:200]}..." if articles_data[0]['body'] else "No Body Content")
    print("-" * 40)
    

Scraping homepage: https://www.brecorder.com/trends/kse100
Found 54 articles.
Scraping article: https://www.brecorder.com/news/40399618/kse-100-hits-new-all-time-high-on-nearly-1500-point-rally
Scraping article: https://www.brecorder.com/news/40399402/pakistan-stocks-expected-to-remain-best-performing-asset-class-in-2026-report
Scraping article: https://www.brecorder.com/news/40395913/kse-100-index-gains-800-points-amid-bullish-momentum
Scraping article: https://www.brecorder.com/news/40395543/selling-pressure-grips-bourse-kse-100-settles-nearly-1500-points-lower
Scraping article: https://www.brecorder.com/news/40395332/kse-100-sheds-over-400-points-amid-late-profit-taking
Scraping article: https://www.brecorder.com/news/40394747/buying-momentum-continues-kse-100-gains-over-1300-points
Scraping article: https://www.brecorder.com/news/40394555/buying-rally-continues-kse-100-settles-with-nearly-2200-points-gain
Scraping article: https://www.brecorder.com/news/40394335/buying-returns-kse-