In [13]:
# python -m pip install -r requirements.txt
# to python-script:
#  - via jupyterlab-gui
#  - via jupyter nbconvert --to script [YOUR_NOTEBOOK].ipynb

In [12]:
import requests
from datetime import datetime, timezone
import json

## Berta Block Boulderhalle Berlin Scraper

In [4]:
"""
acronyms:

cod: status_code
ela: elapsed in microseconds (not milliseconds)
url: url
dat: date
enc: encoding
ctt: content-type
d: data

dli: datalist: [{event start (string utc nach iso 8601), end: event end (string utc nach iso 8601)}, ...]
min: minCourseParticipantCount
max: maxCourseParticipantCount
cur: currentCourseParticipantCount
state: state

"""

pass

In [5]:
def toIsoString(dateTime):
    return dateTime.isoformat()

def getUtc():
    return datetime.now(timezone.utc).replace(microsecond=0)

def getUtcIsoString():
    return toIsoString(getUtc())

def unixTimestampToUTCIsoString(unixTimestamp):
    # cast to int (should be int already but just in case)
    # /1000 to get milliseconds from microseconds
    ts = int(unixTimestamp) / 1000
    dt = datetime.utcfromtimestamp(ts)
    return toIsoString(dt)

def getUnixTimestamp(dt):
    return int(dt.timestamp()) * 1000

def getDatetimeTodayWithSpecificHour(hour):
    return getUtc().replace(hour=hour, minute=0, second=0)

def parse_payload(response_payload):
    data = []
    for a in response_payload:
        record = {
        "dli": [{"sta": unixTimestampToUTCIsoString(b["start"]), "end": unixTimestampToUTCIsoString(b["end"])} for b in a["dateList"]],
        "min": a["minCourseParticipantCount"],
        "max": a["maxCourseParticipantCount"],
        "cur": a["currentCourseParticipantCount"],
        "state": a["state"]
        }
        data.append(record)
    return data

In [6]:
# url = "https://jsonplaceholder.typicode.com/todos/1"

# start: e.g. 1650265200000
# end: e.g. 1650319200000
url = (
        "https://backend.dr-plano.com/courses_dates?" +
        "id=114569964" +
        "&start=" + str(getUnixTimestamp(getDatetimeTodayWithSpecificHour(7))) +
        "&end=" + str(getUnixTimestamp(getDatetimeTodayWithSpecificHour(22))))
url

'https://backend.dr-plano.com/courses_dates?id=114569964&start=1650783600000&end=1650837600000'

In [7]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
    "accept": "application/json"
}
response = requests.get(url, headers=headers)

In [8]:
parsed_result = {
    "cod": response.status_code,
    "ela": response.elapsed.microseconds,
    "url": response.url, 
    "dat": getUtcIsoString(),
    "enc": response.encoding,
    "ctt": response.headers["Content-Type"],
    "d": parse_payload(response.json())
}

In [9]:
json_string = json.dumps(parsed_result)
json_string

'{"cod": 200, "ela": 195437, "url": "https://backend.dr-plano.com/courses_dates?id=114569964&start=1650783600000&end=1650837600000", "dat": "2022-04-24T07:19:28+00:00", "enc": "utf-8", "ctt": "application/json", "d": [{"dli": [{"sta": "2022-04-24T07:00:00", "end": "2022-04-24T10:00:00"}], "min": 1, "max": 40, "cur": 33, "state": "BOOKABLE"}, {"dli": [{"sta": "2022-04-24T07:30:00", "end": "2022-04-24T10:30:00"}], "min": 1, "max": 40, "cur": 19, "state": "BOOKABLE"}, {"dli": [{"sta": "2022-04-24T08:00:00", "end": "2022-04-24T11:00:00"}], "min": 1, "max": 40, "cur": 21, "state": "BOOKABLE"}, {"dli": [{"sta": "2022-04-24T08:30:00", "end": "2022-04-24T11:30:00"}], "min": 1, "max": 40, "cur": 14, "state": "BOOKABLE"}, {"dli": [{"sta": "2022-04-24T09:00:00", "end": "2022-04-24T12:00:00"}], "min": 1, "max": 40, "cur": 11, "state": "BOOKABLE"}, {"dli": [{"sta": "2022-04-24T09:30:00", "end": "2022-04-24T12:30:00"}], "min": 1, "max": 40, "cur": 3, "state": "BOOKABLE"}, {"dli": [{"sta": "2022-04-2

In [10]:
with open("stats/parsed.data", "a") as data_file:
    data_file.write(json_string + "\n")