In [1]:
text = """Make sure to choose right interpreter (e.g. venv) before running these commands.
Enter 'install' to proceed... (may open a popup in VSCode)"""
print(text)
if input(text) =='install':
    pass # 
    %pip install requests
    %pip install beautifulsoup4
    %pip install lxml
    %pip install python-dotenv

Make sure to choose right interpreter (e.g. venv) before running these commands.
Enter 'install' to proceed... (may open a popup in VSCode)


In [1]:
from dotenv import dotenv_values

config = dotenv_values(".env.local")
CLIENT_ID = config.get("CLIENT_ID")
API_KEY = config.get("API_KEY")

In [2]:
"""Raw requests to the Deutsche Bahn Timetable API."""

import requests

base_url = "https://apis.deutschebahn.com/db-api-marketplace/apis/timetables/v1/"

headers = {
    "DB-Client-Id": CLIENT_ID,
    "DB-Api-Key": API_KEY,
    "accept": "application/xml"
}

def get_station(pattern: str):
    url = base_url + f"station/{pattern}"
    response = requests.get(url, headers=headers)
    return response

def get_plan(evaNo: str, dateYYMMDD: str, hourHH: str):
    url = base_url + f"plan/{evaNo}/{dateYYMMDD}/{hourHH}"
    response = requests.get(url, headers=headers)
    return response

def get_changes(evaNo: str):
    url = base_url + f"fchg/{evaNo}"
    response = requests.get(url, headers=headers)
    return response

In [None]:
"""Wrap the raw requests with BeautifulSoup for easier parsing."""

from bs4 import BeautifulSoup

def soupify_xml(data: str):
    return BeautifulSoup(data, "xml")

def better_get_station(pattern: str) -> list:
    """Returns a list of stations matching the pattern."""
    res = get_station(pattern)
    res.raise_for_status()
    soup = soupify_xml(res.text)
    stations = soup.find_all("station")
    return [
        {
            "name": s.get("name"),
            "p (platforms)": s.get("p").split("|") if s.get("p") else [], # platforms
            "eva (EVA station number)": s.get("eva"), # EVA station number
            "ds100 (Station code)": s.get("ds100"), # Station code
            "meta (Meta information)": s.get("meta"), # Meta information
            "db (isDb?)": s.get("db"), # isDb?
        }
        for s in stations
    ]

def better_get_plan(evaNo: str, dateYYMMDD: str, hourHH: str) -> list:
    """Returns the stops for the given station EVA number, date (YYMMDD) and hour (HH)."""
    res = get_plan(evaNo, dateYYMMDD, hourHH)
    res.raise_for_status()
    soup = soupify_xml(res.text)
    
    def parse_ar(ar: BeautifulSoup):
        return {
            "pp (plannedPlatform)": ar.get("pp"), # planned platform
            "ppth (plannedPath)": ar.get("ppth").split('|'), # planned path
            "pt (plannedTime)": ar.get("pt"), # planned time
            "l (line)": ar.get("l"), # line
        }

    def parse_dp(dp: BeautifulSoup):
        return {
            "pp (plannedPlatform)": dp.get("pp"), # planned platform
            "ppth (plannedPath)": dp.get("ppth").split('|'), # planned path
            "pt (plannedTime)": dp.get("pt"), # planned time
            "l (line)": dp.get("l"), # line
        }

    def parse_tl(tl: BeautifulSoup):
        return {
            "c (category)": tl.get("c"), # e.g. ICE, RE
            "n (trainNumber)": tl.get("n"), # train number
            "f (filterFlag)": tl.get("f"), # filter flag
            "o (owner)": tl.get("o"), # owner
            "t (tripType)": tl.get("t"), # trip type {p,e,z,s,h,n}
        }

    def parse_stop(s):
        ar = parse_ar(s.ar) if s.ar else None # arrival
        dp = parse_dp(s.dp) if s.dp else None # departure
        tl = parse_tl(s.tl) if s.tl else None # track/ platform
        return {
            "id": s.get("id"),
            "ar (arrival)": ar,
            "dp (departure)": dp,
            "tl (trainInfoTuple)": tl
        }
        
    stops = soup.find_all("s")
    return [parse_stop(s) for s in stops]

def better_get_changes(evaNo: str) -> BeautifulSoup:
    res = get_changes(evaNo)
    res.raise_for_status()
    soup = soupify_xml(res.text)
    timetable = soup.timetable
    raw_m = timetable.find_all("m") # messagess
    raw_s = timetable.find_all("s") # TimetableStop
    names = set([c.name for c in timetable.children])
    
    def parse_s(raw):
        messages = raw.find_all('m')
        def parse_message(m):
            return { c.name: c.content for c in m.children }
        messages = [parse_message(m) for m in messages]
        return {
            'm': messages,
            'ar': 
        }
        
    print(s[0].prettify())
    # print(soup.timetable.find_all("m")[0].prettify())
    # return None
    # return print(soup.prettify()) # TODO

In [72]:
better_get_changes("8011160")

<s eva="8011160" id="8617305306705083970-2601171345-23">
 <m cat="Störung" from="2601171645" id="r2558183" pr="1" t="h" to="2601171900" ts="2601171712" ts-tts="26-01-17 17:12:48.129"/>
 <m cat="Information" from="2512140000" id="r2508472" pr="2" t="h" to="2612122359" ts="2512132303" ts-tts="26-01-16 23:16:04.185"/>
 <ar ct="2601180008">
  <m c="0" id="r42849891" t="f" ts="2601171447" ts-tts="26-01-17 14:47:25.233"/>
 </ar>
 <dp ct="2601180012">
  <m c="0" id="r42849891" t="f" ts="2601171447" ts-tts="26-01-17 14:47:25.233"/>
 </dp>
</s>



In [4]:
# today as YYMMDD
from datetime import datetime

SOME_DATE =  datetime.now().strftime("%y%m%d")
SOME_HOUR_HH = datetime.now().strftime("%H")

In [5]:
res = get_station("BLS") # Berlin HBF
print(res.text)

<stations>

<station p="11|12 D - G|12|13 A - D|13|14|13 D - G|13 A - C|13 C - D|11 D - G|14 A - D|14 A - C|14 C - D|14 E - F|11 C - D|13 E - F|11 E - F|11 A - D|12 A - D|14 D - G|12 C - D|12 E - F" meta="8070952|8089021|8098160" name="Berlin Hbf" eva="8011160" ds100="BLS" db="true" creationts="26-01-14 00:10:46.074"/>

</stations>



In [6]:
better_get_station("BLS")

[{'name': 'Berlin Hbf',
  'p (platforms)': ['11',
   '12 D - G',
   '12',
   '13 A - D',
   '13',
   '14',
   '13 D - G',
   '13 A - C',
   '13 C - D',
   '11 D - G',
   '14 A - D',
   '14 A - C',
   '14 C - D',
   '14 E - F',
   '11 C - D',
   '13 E - F',
   '11 E - F',
   '11 A - D',
   '12 A - D',
   '14 D - G',
   '12 C - D',
   '12 E - F'],
  'eva (EVA station number)': '8011160',
  'ds100 (Station code)': 'BLS',
  'meta (Meta information)': '8070952|8089021|8098160',
  'db (isDb?)': 'true'}]

In [7]:
stations = better_get_station("*")

In [8]:
def find_includes(key, value):
    return [s for s in stations if s.get(key) and value in s.get(key)]

def find_equals(key, value):
    return [s for s in stations if s.get(key) == value]

find_includes("name", "Karlsruhe Hbf")

[{'name': 'Karlsruhe Hbf',
  'p (platforms)': [],
  'eva (EVA station number)': '8000191',
  'ds100 (Station code)': 'RK',
  'meta (Meta information)': '150089|8079041|8089390',
  'db (isDb?)': 'true'},
 {'name': 'Karlsruhe Hbf Südausgang',
  'p (platforms)': [],
  'eva (EVA station number)': '8089390',
  'ds100 (Station code)': 'RK  S',
  'meta (Meta information)': '150089|8000191|8079041',
  'db (isDb?)': 'true'}]

In [9]:
stations[0]

{'name': 'Aach Rathaus, Dornstetten',
 'p (platforms)': [],
 'eva (EVA station number)': '815010',
 'ds100 (Station code)': 'D815010',
 'meta (Meta information)': '8001510',
 'db (isDb?)': 'false'}

In [10]:
better_get_station("8011160")


[{'name': 'Berlin Hbf',
  'p (platforms)': ['11',
   '12 D - G',
   '12',
   '13 A - D',
   '13',
   '14',
   '13 D - G',
   '13 A - C',
   '13 C - D',
   '11 D - G',
   '14 A - D',
   '14 A - C',
   '14 C - D',
   '14 E - F',
   '11 C - D',
   '13 E - F',
   '11 E - F',
   '11 A - D',
   '12 A - D',
   '14 D - G',
   '12 C - D',
   '12 E - F'],
  'eva (EVA station number)': '8011160',
  'ds100 (Station code)': 'BLS',
  'meta (Meta information)': '8070952|8089021|8098160',
  'db (isDb?)': 'true'}]

In [11]:
better_get_station("8098160")

[{'name': 'Berlin Hbf',
  'p (platforms)': ['1', '2', '3', '4', '5', '6', '7', '8'],
  'eva (EVA station number)': '8098160',
  'ds100 (Station code)': 'BL',
  'meta (Meta information)': '8011160|8070952|8089021',
  'db (isDb?)': 'true'}]

In [12]:
better_get_plan("8000191", SOME_DATE, SOME_HOUR_HH)

[{'id': '3756818115967167362-2601172348-1',
  'ar (arrival)': None,
  'dp (departure)': {'pp (plannedPlatform)': '7',
   'ppth (plannedPath)': ['Karlsruhe-Durlach',
    'Weingarten(Baden)',
    'Untergrombach',
    'Bruchsal Bildungszentrum',
    'Bruchsal',
    'Bruchsal Schloßgarten',
    'Bruchsal Stegwiesen',
    'Ubstadt Ort'],
   'pt (plannedTime)': '2601172348',
   'l (line)': 'S31'},
  'tl (trainInfoTuple)': {'c (category)': 'AVG',
   'n (trainNumber)': '85172',
   'f (filterFlag)': 'D',
   'o (owner)': 'A6S31',
   't (tripType)': 'p'}},
 {'id': '-384675878924478971-2601172312-1',
  'ar (arrival)': None,
  'dp (departure)': {'pp (plannedPlatform)': '1',
   'ppth (plannedPath)': ['Karlsruhe-Hagsfeld',
    'Blankenloch',
    'Friedrichstal(Baden)',
    'Graben-Neudorf',
    'Wiesental',
    'Waghäusel',
    'Neulußheim',
    'Hockenheim',
    'Oftersheim',
    'Schwetzingen',
    'Schwetzingen Nordstadt',
    'Schwetzingen-Hirschacker',
    'Mannheim-Rheinau',
    'Mannheim-Necka

In [13]:
res = get_plan("8011160", SOME_DATE, SOME_HOUR_HH) # Berlin HBF
res.raise_for_status()
print(res.text)


<?xml version='1.0' encoding='UTF-8'?><timetable station='Berlin Hbf'><s id="-8598548156328754795-2601172234-6"><tl f="D" t="p" o="OWRE" c="OE" n="73824"/><ar pt="2601172307" pp="14" l="RE1" fb="RE1" ppth="Erkner|Berlin Ostkreuz|Berlin Ostbahnhof|Berlin Alexanderplatz|Berlin Friedrichstraße"/><dp pt="2601172309" pp="14" l="RE1" fb="RE1" ppth="Berlin Zoologischer Garten|Berlin-Charlottenburg"/></s><s id="-1805375228145536064-2601171822-9"><tl f="F" t="p" o="80" c="ICE" n="655"/><ar pt="2601172349" pp="12" fb="ICE 655" ppth="Bonn Hbf|Köln Hbf|Wuppertal Hbf|Hagen Hbf|Bielefeld Hbf|Hannover Hbf|Magdeburg Hbf|Berlin Zoologischer Garten"/><dp pt="2601172355" pp="12" fb="ICE 655" ppth="Berlin Ostbahnhof"/></s><s id="1832688437198980020-2601172300-7"><tl f="N" t="p" o="800165" c="RE" n="3138"/><ar pt="2601172350" pp="11" l="RE2" fb="RE2" ppth="Hennigsdorf(b Berlin)|Dallgow-Döberitz|Berlin-Staaken|Berlin-Spandau|Berlin-Charlottenburg|Berlin Zoologischer Garten"/><dp pt="2601172352" pp="11" l="R

In [14]:
better_get_plan("8011160", SOME_DATE, SOME_HOUR_HH)

[{'id': '-8598548156328754795-2601172234-6',
  'ar (arrival)': {'pp (plannedPlatform)': '14',
   'ppth (plannedPath)': ['Erkner',
    'Berlin Ostkreuz',
    'Berlin Ostbahnhof',
    'Berlin Alexanderplatz',
    'Berlin Friedrichstraße'],
   'pt (plannedTime)': '2601172307',
   'l (line)': 'RE1'},
  'dp (departure)': {'pp (plannedPlatform)': '14',
   'ppth (plannedPath)': ['Berlin Zoologischer Garten',
    'Berlin-Charlottenburg'],
   'pt (plannedTime)': '2601172309',
   'l (line)': 'RE1'},
  'tl (trainInfoTuple)': {'c (category)': 'OE',
   'n (trainNumber)': '73824',
   'f (filterFlag)': 'D',
   'o (owner)': 'OWRE',
   't (tripType)': 'p'}},
 {'id': '-1805375228145536064-2601171822-9',
  'ar (arrival)': {'pp (plannedPlatform)': '12',
   'ppth (plannedPath)': ['Bonn Hbf',
    'Köln Hbf',
    'Wuppertal Hbf',
    'Hagen Hbf',
    'Bielefeld Hbf',
    'Hannover Hbf',
    'Magdeburg Hbf',
    'Berlin Zoologischer Garten'],
   'pt (plannedTime)': '2601172349',
   'l (line)': None},
  'dp (d

In [15]:
res = get_changes("8011160") # Berlin HBF
print(res.text)

<timetable station="Berlin Hbf" eva="8011160">

<s id="2952296863083283051-2601171921-9" eva="8011160">
    <ar ct="2601180033">
        <m id="r42879711" t="d" c="43" ts="2601171951" ts-tts="26-01-17 19:52:09.914"/>
        <m id="r42879713" t="d" c="43" ts="2601171951" ts-tts="26-01-17 19:52:09.914"/>
    </ar>
    <dp ct="2601180036">
        <m id="r42879711" t="d" c="43" ts="2601171951" ts-tts="26-01-17 19:52:09.914"/>
        <m id="r42879713" t="d" c="43" ts="2601171951" ts-tts="26-01-17 19:52:09.914"/>
    </dp>
</s>


<s id="-5978303486513211708-2601171809-9" eva="8011160">
    <m id="r2557924" t="h" from="2601171306" to="2601172320" cat="Information" ts="2601170132" ts-tts="26-01-17 01:32:18.041" pr="2"/>
    <m id="r2558110" t="h" from="2601171306" to="2601172320" cat="Information" ts="2601171258" ts-tts="26-01-17 12:58:07.867" pr="3"/>
    <ar ppth="Frankfurt(M) Flughafen Fernbf|Fulda|Kassel-Wilhelmshöhe|Göttingen|Hildesheim Hbf|Braunschweig Hbf|Magdeburg Hbf|Berlin Zoologi

In [16]:
better_get_changes("8011160")

<?xml version="1.0" encoding="utf-8"?>
<timetable eva="8011160" station="Berlin Hbf">
 <s eva="8011160" id="2952296863083283051-2601171921-9">
  <ar ct="2601180033">
   <m c="43" id="r42879711" t="d" ts="2601171951" ts-tts="26-01-17 19:52:09.914"/>
   <m c="43" id="r42879713" t="d" ts="2601171951" ts-tts="26-01-17 19:52:09.914"/>
  </ar>
  <dp ct="2601180036">
   <m c="43" id="r42879711" t="d" ts="2601171951" ts-tts="26-01-17 19:52:09.914"/>
   <m c="43" id="r42879713" t="d" ts="2601171951" ts-tts="26-01-17 19:52:09.914"/>
  </dp>
 </s>
 <s eva="8011160" id="-5978303486513211708-2601171809-9">
  <m cat="Information" from="2601171306" id="r2557924" pr="2" t="h" to="2601172320" ts="2601170132" ts-tts="26-01-17 01:32:18.041"/>
  <m cat="Information" from="2601171306" id="r2558110" pr="3" t="h" to="2601172320" ts="2601171258" ts-tts="26-01-17 12:58:07.867"/>
  <ar ct="2601172344" fb="ICE 792" pp="11" ppth="Frankfurt(M) Flughafen Fernbf|Fulda|Kassel-Wilhelmshöhe|Göttingen|Hildesheim Hbf|Bra

In [18]:
from datetime import datetime

def test_frequency(request, test_time: int):
    t_start = datetime.now()
    def dt():
        return (datetime.now() - t_start).seconds
    
    call_count = 0
    # for X seconds
    while dt() < test_time:
        response = request()
        response.raise_for_status()
        print(response.status_code, end=' ', flush=True)
        call_count += 1
    print(f"\nCalls in {test_time} seconds: {call_count} ({call_count / test_time} calls/second)")

print("getPlan()...")
test_frequency(lambda: get_plan("8011160", SOME_DATE, SOME_HOUR_HH), 30)

print("getStation()...")
test_frequency(lambda: get_station("BLS"), 30)

print("getChanges()...")
test_frequency(lambda: get_changes("8011160"), 30)

getPlan()...
200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 
Calls in 30 seconds: 28 (0.9333333333333333 calls/second)
getStation()...
200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 
Calls in 30 seconds: 20 (0.6666666666666666 calls/second)
getChanges()...
200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 
Calls in 30 seconds: 15 (0.5 calls/second)
