## Goal: Predict Peak Times in the DHBW Cafeteria Using Calendar Data

This notebook integrates and analyzes external calendar data (e.g., public holidays, lecture schedules, exam periods) to improve the prediction of peak hours in the DHBW cafeteria. By identifying patterns in visitor behavior, we aim to support better planning for both cafeteria staff and students.


### Funktion scrape_rapla_calendar

Scrape the Rapla calendar HTML for a given user & file (course).\
Optionally filter calendar week.


In [2]:
import re
import requests
import pandas as pd
from bs4 import BeautifulSoup

def extract_fields(title_text: str) -> dict:
    fields = {
        "veranstaltungstitel": None,
        "sprache": None,
        "studiengang": None,
        "anzahl_studierende": None,
        "geplante_vorlesungsstunden": None,
        "planungsnotiz": None,
        "ressourcen": None,
        "personen": None,
    }

    if not title_text.strip().lower().startswith("lehrveranstaltung"):
        first_line = title_text.strip().split("\n")[0]
        fields["veranstaltungstitel"] = first_line.strip()
        return fields

    def extract(label):
        pattern = rf"{label}:\s*\n*(.*?)\n\n"
        match = re.search(pattern, title_text, re.DOTALL | re.IGNORECASE)
        return match.group(1).strip() if match else None

    fields["veranstaltungstitel"] = extract("Titel")
    fields["sprache"] = extract("Sprache")
    fields["studiengang"] = extract("Studiengang")
    fields["anzahl_studierende"] = extract("Anzahl Studierende")
    fields["geplante_vorlesungsstunden"] = extract("Gepl\. Vorlesungsstunden")
    fields["planungsnotiz"] = extract("Planungsnotiz")

    res_match = re.search(r"Ressourcen:\s*\n(.*?)\n\n", title_text, re.DOTALL)
    if res_match:
        ressourcen = res_match.group(1).strip().replace("\n", ",")
        fields["ressourcen"] = ressourcen

    personen_match = re.search(r"Personen:\s*\n(.*?)\n\n", title_text, re.DOTALL)
    if personen_match:
        personen_block = personen_match.group(1).strip()
        personen = list({line.strip() for line in personen_block.split("\n") if line.strip()})
        fields["personen"] = ", ".join(personen)

    return fields


def scrape_rapla_calendar(user: str, file: str, day: int = None, month: int = None, year: int = None):
    base_url = "https://rapla.dhbw-karlsruhe.de/rapla"
    params = {
        "page": "calendar",
        "user": user,
        "file": file
    }

    if day and month and year:
        params.update({
            "day": day,
            "month": month,
            "year": year
        })

    try:
        resp = requests.get(base_url, params=params)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "html.parser")
        text = soup.get_text(separator="\n")
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return pd.DataFrame()

    pattern = re.compile(
        r"(?P<start>\d{2}:\d{2})\s*-\s*(?P<end>\d{2}:\d{2})\s+"
        r"(?P<title>.*?)(?:\n|\s)+"
        r"(?P<weekday>Mo|Di|Mi|Do|Fr|Sa|So)\s+"
        r"(?P<date>\d{2}\.\d{2}\.\d{2})",
        re.DOTALL
    )

    events = []
    for m in pattern.finditer(text):
        try:
            dd, mm, yy = m.group("date").split(".")
            iso_date = f"20{yy}-{mm}-{dd}"
            title_raw = m.group("title").strip()

            extracted = extract_fields(title_raw)

            event = {
                "title": title_raw,
                "start": f"{iso_date} {m.group('start')}",
                "end": f"{iso_date} {m.group('end')}",
                "weekday": m.group("weekday"),
                "raw_date": m.group("date"),
            }
            event.update(extracted)
            events.append(event)
        except Exception as e:
            print(f"Error processing event: {e}, text: {m.group(0)}")
            continue 

    df = pd.DataFrame(events)

    if df.empty:
        print("Warning: No events found for this period.")
        return df  

    try:
        df["start"] = pd.to_datetime(df["start"])
        df["end"] = pd.to_datetime(df["end"])
        df["date"] = df["start"].dt.date
    except KeyError as e:
        print(f"KeyError: {e}.  Columns in DataFrame: {df.columns}")
        return df

    return df

In [9]:
import datetime

def get_monday_of_week(year: int, week: int) -> datetime.date:
    """
    Returns the date of the Monday for a given year and ISO week number.
    ISO calendar: Monday as the first day of the week.
    ISO week 1: the week containing the first Thursday of the year.
    """
    return datetime.datetime.strptime(f'{year}-W{week:02d}-1', "%Y-W%W-%w").date()

def scrape_full_year(user: str, file: str, year: int = 2025):
    all_dfs = []
    max_week = 53

    for week in range(1, max_week + 1):
        try:
            monday = get_monday_of_week(year, week)
        except ValueError:
            continue

        print(f"Scraping week {week}, start date {monday}")
        df_week = scrape_rapla_calendar(
            user=user,
            file=file,
            day=monday.day,
            month=monday.month,
            year=monday.year,
        )
        all_dfs.append(df_week)

    full_df = pd.concat(all_dfs, ignore_index=True)
    return full_df


In [58]:
def download_all_calendars_with_study_programs():
    study_programs = {
        # Wirtschaftsinformatik
        "WWI22B1": "brockmans",
        "WWI23B1": "brockmans",
        "WWI24B1": "brockmans",
        "WWI22B2": "ratz",
        "WWI23B2": "ratz",
        "WWI24B2": "ratz",
        # B3, B4, B5 fehlen
        # Angewante Informatik
        "TINF22B1": "freudenmann",
        "TINF23B1": "freudenmann",
        "TINF24B1": "freudenmann",
        "TINF22B4": "eisenbiegler",
        "TINF23B4": "eisenbiegler",
        "TINF24B4": "eisenbiegler",
        "TINF22B6": "li",
        "TINF23B6": "li",
        "TINF24B6": "li",
        "TINF22B2": "ritterbusch",
        "TINF23B2": "ritterbusch",
        "TINF24B2": "ritterbusch",
        "TINF22B5": "Strand",
        "TINF23B5": "Strand",
        "TINF24B5": "Strand",
    }

    all_dataframes = []

    for course, user in study_programs.items():
        try:
            df = scrape_full_year(user=user, file=course, year=2025)
            df["Kurs"] = course
            all_dataframes.append(df)
        except Exception as e:
            print(f"Fehler bei Kurs {course} mit User {user}: {e}")

    if all_dataframes:
        final_df = pd.concat(all_dataframes, ignore_index=True)
        final_df.to_csv("all_calenders_2025.csv", index=False)
    else:
        print("No calenders found")

download_all_calendars_with_study_programs()


Scraping week 1, start date 2025-01-06
Scraping week 2, start date 2025-01-13
Scraping week 3, start date 2025-01-20
Scraping week 4, start date 2025-01-27
Scraping week 5, start date 2025-02-03
Scraping week 6, start date 2025-02-10
Scraping week 7, start date 2025-02-17
Scraping week 8, start date 2025-02-24
Scraping week 9, start date 2025-03-03
Scraping week 10, start date 2025-03-10
Scraping week 11, start date 2025-03-17
Scraping week 12, start date 2025-03-24
Scraping week 13, start date 2025-03-31
Scraping week 14, start date 2025-04-07
Scraping week 15, start date 2025-04-14
Scraping week 16, start date 2025-04-21
Scraping week 17, start date 2025-04-28
Scraping week 18, start date 2025-05-05
Scraping week 19, start date 2025-05-12
Scraping week 20, start date 2025-05-19
Scraping week 21, start date 2025-05-26
Scraping week 22, start date 2025-06-02
Scraping week 23, start date 2025-06-09
Scraping week 24, start date 2025-06-16
Scraping week 25, start date 2025-06-23
Scraping 

In [8]:
df = pd.read_csv("all_calenders_2025.csv")
df[df["Kurs"]=="WWI24B2"].tail(10)

Unnamed: 0,title,start,end,weekday,raw_date,veranstaltungstitel,sprache,studiengang,anzahl_studierende,geplante_vorlesungsstunden,planungsnotiz,ressourcen,personen,date,Kurs
687,"Teamarbeit I/II\nerstellt am 17.07.20 10:54, z...",2025-10-17 08:30:00,2025-10-17 11:45:00,Fr,17.10.25,Teamarbeit I/II,,,,,,,,2025-10-17,WWI24B2
688,Lehrveranstaltung\n\n\nTitel:\n\n\nTeamarbeit ...,2025-10-15 08:30:00,2025-10-15 11:45:00,Mi,15.10.25,Teamarbeit I/II,Deutsch,Wirtschaft/Wirtschaftsinformatik,0.0,28.0,reserviert von:,WWI24B2,"Könning, Jutta",2025-10-15,WWI24B2
689,Lehrveranstaltung\n\n\nTitel:\n\n\nKernkonzept...,2025-10-17 09:00:00,2025-10-17 12:15:00,Fr,17.10.25,Kernkonzepte und Methoden des SE,Deutsch,Wirtschaft/Wirtschaftsinformatik,1.0,28.0,reserviert von:,WWI24B2,"Schätzle, Roland",2025-10-17,WWI24B2
690,Kernkonzepte und Methoden des SE\nerstellt am ...,2025-10-22 09:00:00,2025-10-22 12:15:00,Mi,22.10.25,Kernkonzepte und Methoden des SE,,,,,,,,2025-10-22,WWI24B2
691,Lehrveranstaltung\n\n\nTitel:\n\n\nKernkonzept...,2025-10-24 09:00:00,2025-10-24 12:15:00,Fr,24.10.25,Kernkonzepte und Methoden des SE,Deutsch,Wirtschaft/Wirtschaftsinformatik,1.0,28.0,reserviert von:,WWI24B2,"Schätzle, Roland",2025-10-24,WWI24B2
692,Kernkonzepte und Methoden des SE\nerstellt am ...,2025-10-29 09:00:00,2025-10-29 12:15:00,Mi,29.10.25,Kernkonzepte und Methoden des SE,,,,,,,,2025-10-29,WWI24B2
693,Lehrveranstaltung\n\n\nTitel:\n\n\nKernkonzept...,2025-10-31 09:00:00,2025-10-31 12:15:00,Fr,31.10.25,Kernkonzepte und Methoden des SE,Deutsch,Wirtschaft/Wirtschaftsinformatik,1.0,28.0,reserviert von:,WWI24B2,"Schätzle, Roland",2025-10-31,WWI24B2
694,Kurztest SE Software Engineering II (30 min)\n...,2025-11-05 09:00:00,2025-11-05 09:30:00,Mi,05.11.25,Kurztest SE Software Engineering II (30 min),,,,,,,,2025-11-05,WWI24B2
695,Hauptklausur VWL (60 min)\nerstellt am 14.12.1...,2025-11-12 10:00:00,2025-11-12 11:00:00,Mi,12.11.25,Hauptklausur VWL (60 min),,,,,,,,2025-11-12,WWI24B2
696,Prüfung\n\n\nTitel:\n\n\nHauptklausur\n\n\n\n\...,2025-11-14 10:00:00,2025-11-14 11:00:00,Fr,14.11.25,Prüfung,,,,,,,,2025-11-14,WWI24B2
