In [39]:
# %%
import asyncio
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
import pandas as pd
import re

In [40]:
async def scrape_nrweuropa():
    url = "https://nrweuropa.de/cascadefunding/"
    funding_data = []

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        await page.goto(url, timeout=60000)
        html = await page.content()
        await browser.close()

    soup = BeautifulSoup(html, "html.parser")
    strong_tags = soup.find_all("strong", string=lambda text: text and "Open Call" in text)

    for tag in strong_tags:
        title = tag.get_text(strip=True)
        parent_p = tag.find_parent("p")
        if not parent_p:
            continue

        # --- Combine paragraph + nearby text ---
        full_text = parent_p.get_text(" ", strip=True)
        for i, sibling in enumerate(parent_p.next_siblings):
            if i >= 5: break
            if hasattr(sibling, "get_text"):
                full_text += " " + sibling.get_text(" ", strip=True)

        # --- Extract fields using scoped regex ---
        extract = lambda pattern: re.search(pattern, full_text)
        topic = extract(r"Thema:\s*(.*?)\s*(Förderfähig:|Budget:|Antragsfrist:|$)")
        eligibility = extract(r"Förderfähig:\s*(.*?)\s*(Budget:|Antragsfrist:|$)")
        budget = extract(r"Budget:\s*(.*?)\s*(Antragsfrist:|$)")
        deadline = extract(r"Antragsfrist:\s*([\d\.]+\s+[A-Za-zäöüÄÖÜ]+\s+\d{4})")

        topic = topic.group(1).strip() if topic else None
        eligibility = eligibility.group(1).strip() if eligibility else None
        budget = budget.group(1).strip() if budget else None
        deadline = deadline.group(1).strip() if deadline else None

        # --- Clean Description ---
        description = full_text
        for label, val in {
            "Thema:": topic,
            "Förderfähig:": eligibility,
            "Budget:": budget,
            "Antragsfrist:": deadline,
            title: title
        }.items():
            if val:
                description = description.replace(f"{label} {val}", "")
        description = description.replace(title, "").strip()

        # --- Extract URL ---
        url_link = next(
            (a.get("href") for a in parent_p.find_all("a") if a.get("href")),
            None
        )

        if not url_link:
            for i, sibling in enumerate(parent_p.next_siblings):
                if i >= 5: break
                if hasattr(sibling, "find"):
                    a = sibling.find("a")
                    if hasattr(a, "get"):
                        href = a.get("href")
                        if href and href.strip():
                            url_link = href
                            break


        # --- Save data ---
        funding_data.append({
            "Name": title,
            "Topic": topic,
            "Eligibility": eligibility,
            "Budget": budget,
            "Deadline": deadline,
            "Description": description,
            "URL": url_link,
        })

    return pd.DataFrame(funding_data)


In [41]:
# %%
# Run it
df = await scrape_nrweuropa()
df.head()


Unnamed: 0,Name,Topic,Eligibility,Budget,Deadline,Description,URL
0,FORTIS 1st Open Call,"Mensch-Roboter-Interaktion (HRI), multimodale ...","Konsortien aus 2-3 Organisationen: Start-ups, ...",Bis zu 250.000 Euro,4. Juni 2025,"FORTIS unterstützt Projekte, die innovative Lö...",https://fortis-project.eu/open-call-1/
1,MASTER 2nd Open Call,VR/XR,"Universitäten, Forschungseinrichtungen, Bildun...",Bis zu 100.000 Euro,12. Juni 2025,"MASTER unterstützt Projekte, die innovative XR...",https://www.master-xr.eu/open-calls/open-call-2/
2,PEDVolution Open Call,"Energienetze, Energieeffizienz","Start-ups, KMU, Mid-caps, Forschungseinrichtun...",Bis zu 50.000 Euro,30. Juni 2025,"PEDvolution unterstützt Projekte, die innovati...",https://news.pedvolution.eu/posts/open-call-fo...
3,SMURF 2nd Open Call,Fortswirtschaft & Waldbewirtschaft,"Kommunen, Verbände, Föderationen, oder Genosse...",Bis zu 60.000 Euro,2. Juli 2025,"Das SMURF-Projekt zielt darauf ab, Innovation,...",https://www.smurfproject.eu/cascade-funding-sm...
4,GUARDIANS 1st Open Call,Landwirtschaft & Digitalisierung,"KMU, Start-Up, Forschungseinrichtung oder Univ...",Bis zu 81.000 Euro,15. Juli 2025,"GUARDIANS fördert die gemeinsame Entwicklung, ...",https://guardians-project.eu/call/open-call-1-...


In [1]:
df.to_csv("funding_nrweuropa_data.csv", index=False)

NameError: name 'df' is not defined