## Fetch data from 19115.

In [1]:
import requests
from asyncio import sleep
from datetime import date, datetime, timedelta
import gzip
import json

In [2]:
# URL format
REQUEST_URL = "https://api.um.warszawa.pl/api/action/19115v2_incidents/?apikey=XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" # get your own API key :)

# delay between requests
REQUEST_DELAY_SEC = 0

In [3]:
# test request
payload = json.dumps({
    "filters": [
        {
            "field": "CREATE_DATE",
            "operator": "GEQ",
            "value": [
                "2021-11-11T10:00:00Z"
            ]
        },
        {
            "field": "CREATE_DATE",
            "operator": "LEQ",
            "value": [
                "2021-11-11T11:00:00Z"
            ]
        }
    ],
    "operators": [
        "AND"
    ],
    "sorters": [],
    "paginator": {
        "resultLimit": 2000,
        "resultOffset": 0
    }
})
headers = {
    'Content-Type': 'application/json'
}

response = requests.request("POST", REQUEST_URL, headers=headers, data=payload)

# print(response.text)


In [4]:
response.content

b'    {"result":{"success":true,"result":{"totalRecords":"85","message":"OK","result":[{"status":"Zamkni\\u0119te","city":"Warszawa","categoryName":"Proces Interwencyjny","createDate":"2021-11-11 11:00:19","lon":-1,"caseId":"1-10TLHL8","number":"613079\\/21","eventName":"Uwagi dotycz\\u0105ce przystanku - rozk\\u0142ad jazdy, uszkodzenia","subcategoryName":"Komunikacja","lat":-1,"id":"1-10TL9VC","sourceType":"CALL"},{"status":"Zamkni\\u0119te","categoryName":"Informacyjne","createDate":"2021-11-11 11:01:19","lon":-1,"number":"613080\\/21","caseId":"1-10TLABJ","subcategoryName":"Inne","lat":-1,"id":"1-10TLA6U","sourceType":"CALL"},{"status":"Zamkni\\u0119te","city":"Warszawa","categoryName":"Proces Interwencyjny","district":"Ursyn\\u00f3w","createDate":"2021-11-11 11:01:43","lon":21.020491371783493,"caseId":"1-10TLHJQ","number":"613081\\/21","eventName":"Przepe\\u0142nienie\\/brak odbioru wg harmonogramu","street":"Bociania","subcategoryName":"\\u015amieci","lat":52.148446489663534,"hou

In [5]:
def fetch_file(date_start: str, date_end: str, raw_data_folder: str = "data_raw/"):

    print("Fetching:", date_start, date_end)

    payload = json.dumps({
        "filters": [
            {
                "field": "CREATE_DATE",
                "operator": "GEQ",
                "value": [
                    date_start
                ]
            },
            {
                "field": "CREATE_DATE",
                "operator": "LEQ",
                "value": [
                    date_end
                ]
            }
        ],
        "operators": [
            "AND"
        ],
        "sorters": [],
        "paginator": {
            "resultLimit": 2000,
            "resultOffset": 0
        }
    })
    headers = {'Content-Type': 'application/json'}

    response = requests.request(
        "POST", REQUEST_URL, headers=headers, data=payload)

    # check
    response.raise_for_status()

    filename = date_start + "-" + date_end
    filename = filename.replace(":", "")

    # compress and save response
    filename = filename + ".gz"
    with gzip.open(raw_data_folder+filename, "wb") as f:
        f.write(response.content)
        print("Saved:", filename)


In [6]:
# test call
fetch_file("2021-11-11T10:00:00Z", "2021-11-11T11:00:00Z")

Fetching: 2021-11-11T10:00:00Z 2021-11-11T11:00:00Z
Saved: 2021-11-11T100000Z-2021-11-11T110000Z.gz


In [None]:
# loop

start = datetime(2021, 1, 1, 00, 0, 0)
end = datetime(2022, 9, 24, 0, 0, 0)
interval = timedelta(hours=1)

while start < end:
    print(start, start + interval)
    start_str = start.strftime("%Y-%m-%dT%H:%M:%SZ")
    end_str = (start + interval).strftime("%Y-%m-%dT%H:%M:%SZ")
    fetch_file(start_str, end_str)

    start += interval
    await sleep(REQUEST_DELAY_SEC)