In [None]:
import re
import requests
import pandas as pd
from bs4 import BeautifulSoup

def scrape_rapla_calendar(user: str, file: str, week: int = None):
    """
    Scrape the Rapla calendar HTML for a given user & file (course).
    Optionally filter nach Kalenderwoche.
    """
    base_url = "https://rapla.dhbw-karlsruhe.de/rapla"
    params = {
        "page": "calendar",
        "user": user,
        "file": file
    }
    if week:
        params["week"] = week

    # 1) HTML holen
    resp = requests.get(base_url, params=params)
    resp.raise_for_status()  # HTTP-Fehler werfen 

    # 2) Soup erstellen
    soup = BeautifulSoup(resp.text, "html.parser")  # 

    # 3) gesamten Text (inkl. Zeilenumbrüche) extrahieren
    text = soup.get_text(separator="\n")

    # 4) Regex: Zeiten + Titel + Wochentag + Datum
    #    Beispiel: "09:00 -14:15 Projektrealisierung ... Mo 19.05.25 ..."
    pattern = re.compile(
        r"(?P<start>\d{2}:\d{2})\s*-\s*(?P<end>\d{2}:\d{2})\s+"
        r"(?P<title>.*?)\s+"
        r"(?P<weekday>Mo|Di|Mi|Do|Fr|Sa|So)\s+"
        r"(?P<date>\d{2}\.\d{2}\.\d{2})",
        re.DOTALL
    )

    events = []
    for m in pattern.finditer(text):
        # Datum in ISO-Format umwandeln
        dd, mm, yy = m.group("date").split(".")
        iso_date = f"20{yy}-{mm}-{dd}"

        events.append({
            "title":       m.group("title").strip(),
            "start":       f"{iso_date} {m.group('start')}",
            "end":         f"{iso_date} {m.group('end')}",
            "weekday":     m.group("weekday"),
            "raw_date":    m.group("date")
        })

    # 5) DataFrame bauen
    df = pd.DataFrame(events)
    # Zeitspalten parsen
    df["start"]   = pd.to_datetime(df["start"])
    df["end"]     = pd.to_datetime(df["end"])
    df["date"]    = df["start"].dt.date
    return df


df = scrape_rapla_calendar(user="brockmans", file="WWI22B1")
df.head()


                                               title               start  \
0  Projektrealisierung\nerstellt am 11.04.16 10:0... 2025-05-19 09:00:00   
1  Lehrveranstaltung\n\n\nTitel:\n\n\nProjektreal... 2025-05-21 09:00:00   
2  Lehrveranstaltung\n\n\nTitel:\n\n\nAusgewählte... 2025-05-23 09:00:00   
3  Lehrveranstaltung\n\n\nTitel:\n\n\nWahlmodul (... 2025-05-23 09:00:00   
4  Betrieb und Management von IT-Infrastrukturen ... 2025-05-22 09:30:00   

                  end weekday  raw_date        date  
0 2025-05-19 14:15:00      Mo  19.05.25  2025-05-19  
1 2025-05-21 14:15:00      Mi  21.05.25  2025-05-21  
2 2025-05-23 10:30:00      Fr  23.05.25  2025-05-23  
3 2025-05-23 12:15:00      Fr  23.05.25  2025-05-23  
4 2025-05-22 12:45:00      Do  22.05.25  2025-05-22  
