# 🧼 ISB Scraper (Full Serial + First Entry Fix)
Fetches all funding programs including edge entries in order.

In [None]:
# !pip install playwright beautifulsoup4 pandas
# !playwright install

In [None]:

import asyncio
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
import pandas as pd


In [None]:

async def get_all_links_ordered():
    url = "https://isb.rlp.de/service/foerderung.html"
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        await page.goto(url, timeout=60000)
        await page.wait_for_selector(".isb-foerderfinder-list2")  # ensure list loads

        # Scroll manually a few times (force all entries to render)
        for _ in range(15):
            await page.mouse.wheel(0, 1500)
            await page.wait_for_timeout(800)

        html = await page.content()
        browser.close()

        soup = BeautifulSoup(html, "html.parser")
        anchor_tags = soup.select(".isb-foerderfinder-list2-title a")

        links = []
        for a in anchor_tags:
            href = a.get("href")
            if href and "/foerderung/" in href:
                full = "https://isb.rlp.de" + href if href.startswith("/") else href
                links.append(full)

        return links


In [None]:

async def scrape_funding_page(playwright, url):
    data = {
        "name": None, "description": None, "eligibility": None,
        "amount": None, "procedure": None, "contact": None, "url": url
    }

    browser = await playwright.chromium.launch(headless=True)
    page = await browser.new_page()
    try:
        await page.goto(url, timeout=60000)
        await page.wait_for_timeout(2000)
        html = await page.content()
        soup = BeautifulSoup(html, "html.parser")

        name = soup.select_one("h1.col-md-10.col-sm-12.col-xs-12")
        if name: data["name"] = name.get_text(strip=True)

        desc = soup.select_one("div.ce-textpic.ce-center.ce-above div.ce-bodytext")
        if desc: data["description"] = desc.get_text(strip=True)

        # Accordion Panels
        for panel in soup.select("div.panel.panel-default"):
            try:
                title = panel.select_one("h4.panel-title a").get_text(strip=True).lower()
                content = panel.select_one("div.panel-collapse .ce-bodytext").get_text(strip=True)
                if "wer wird" in title:
                    data["eligibility"] = content
                elif "wie wird" in title or "finanziert" in title:
                    data["amount"] = content
                elif "beantrag" in title or "antrag" in title:
                    data["procedure"] = content
            except:
                continue

        # Contact info
        contacts = []
        for div in soup.select("div.col-xs-12.col-lg-3 div.isb-contact-item"):
            lines = div.select("div.isb-contact__line .isb-contact__value")
            parts = [l.get_text(strip=True) for l in lines]
            contacts.append(" | ".join(parts))
        if contacts:
            data["contact"] = " || ".join(contacts)

    except Exception as e:
        print(f"❌ Error scraping {url}: {e}")
    await browser.close()
    return data


In [None]:

async def run_full_scraper():
    links = await get_all_links_ordered()
    print(f"🔗 Total links found: {len(links)}")

    all_data = []
    async with async_playwright() as p:
        for i, link in enumerate(links):
            print(f"⏳ [{i+1}/{len(links)}] Scraping: {link}")
            entry = await scrape_funding_page(p, link)
            all_data.append(entry)

    df = pd.DataFrame(all_data)
    df.to_csv("funding-isb.csv", index=False)
    print("✅ Data saved to funding-isb.csv")
    return df


In [None]:

# Run it
df = await run_full_scraper()
df.head()
