In [None]:
from seleniumwire import webdriver
import time
import pathlib
import os
import requests
from urllib.parse import urlparse, parse_qs
from io import BytesIO
import pandas as pd
import json


# Get metadata of all bike counting stations of a certain city

In [None]:
URL_STUTTGART = 'https://data.eco-counter.com/ParcPublic/?id=607#'

In [None]:
# get driver path
current_path = pathlib.Path().resolve()
driver_path = os.path.join(current_path, "chromedriver")

driver = webdriver.Chrome(driver_path)

driver.get(URL_STUTTGART)

# wait for page to load competletely
time.sleep(1)

In [None]:
# get all requests made by the page
all_requests = driver.requests

In [5]:
# define target url, which shall be filtered out
TARGET_URL = "www.eco-visio.net"
GET_REQUEST = ""

Filter out the get request made to the database and get the metadata of all bike counting stations in a city

In [6]:
for request in all_requests:
    # parse url and filter out query param
    parsed_url = urlparse(request.url)
    query_dictionary = parse_qs(parsed_url.query)
    # check if target url matches
    if (parsed_url.netloc == TARGET_URL):
        GET_REQUEST = request
        print(GET_REQUEST.url)
        break

https://www.eco-visio.net/api/aladdin/1.0.0/pbl/publicwebpageplus/607?withNull=true


Fetch metadata information of all counting stations of a certain city

In [7]:
url_endpoint = GET_REQUEST
response = requests.get(url_endpoint)
response

<Response [200]>

extract relevant metadata of the counting stations and save it to json file

In [8]:
data_json = json.loads(response.content)

In [9]:
counting_station_list = []
for counting_station in data_json:
    counting_station_dict = {}
    counting_station_dict["id"] = counting_station["idPdc"]
    counting_station_dict["name"] = counting_station["nom"]
    counting_station_dict["latitude"] = counting_station["lat"]
    counting_station_dict["longitude"] = counting_station["lon"]
    counting_station_dict["start_time"] = counting_station["debut"]
    counting_station_list.append(counting_station_dict) 

In [10]:
data = {}
data['Stuttgart'] = counting_station_list

In [11]:
with open('counting_stations_metadata.json', 'w') as outfile:
    json.dump(data, outfile)

# Get data from bike counting stations

In [12]:
file = open('counting_stations_metadata.json',)
counting_stations_metadata = json.load(file)
# select counting station


In [13]:
counting_stations_metadata

{'Stuttgart': [{'id': 100034882,
   'name': 'König-Karls-Brücke Barometer',
   'latitude': 48.79912609299229,
   'longitude': 9.210372161982697,
   'start_time': '01/01/2014'},
  {'id': 100034894,
   'name': 'Böblinger Straße',
   'latitude': 48.74661091153218,
   'longitude': 9.135662267916839,
   'start_time': '01/01/2014'},
  {'id': 100042486,
   'name': 'Taubenheimstraße',
   'latitude': 48.804909684344075,
   'longitude': 9.226946272791574,
   'start_time': '01/01/2014'},
  {'id': 100042487,
   'name': 'Waiblinger Straße',
   'latitude': 48.80513535517377,
   'longitude': 9.225987627445804,
   'start_time': '01/01/2014'},
  {'id': 100060100,
   'name': 'Samaraweg',
   'latitude': 48.80811,
   'longitude': 9.18551,
   'start_time': '01/01/2014'},
  {'id': 100061257,
   'name': 'Waldburgstraße',
   'latitude': 48.716494,
   'longitude': 9.08652,
   'start_time': '01/01/2014'},
  {'id': 100061633,
   'name': 'Kremmlerstraße',
   'latitude': 48.739821,
   'longitude': 9.152228,
   'st

In [14]:
i = 4

Create URL for specific bike counting station

In [15]:
BASE_URL = "https://data.eco-counter.com/public2/?id=" +  str(counting_stations_metadata["Stuttgart"][i]["id"])

In [16]:
driver.get(BASE_URL)

# wait for page to load competletely
time.sleep(1)

# get all requests made by the page
all_requests = driver.requests

# define target url, which shall be filtered out
TARGET_URL = "www.eco-visio.net"
GET_REQUEST = ""

In [17]:
for request in all_requests:
    # parse url and filter out query param
    parsed_url = urlparse(request.url)
    query_dictionary = parse_qs(parsed_url.query)
    # check if target url matches and has a token in query
    if (parsed_url.netloc == TARGET_URL and "t" in query_dictionary):
        GET_REQUEST = request
        print(GET_REQUEST.url)
        break

https://www.eco-visio.net/api/aladdin/1.0.0/pbl/publicwebpage/data/100060100?begin=20200621&end=20210921&step=4&domain=607&withNull=true&t=b3098cd11ab3cb7bd4e11ddf8ffdecbf88380ee9ce294faf79b86f886c38707a


In [18]:
parsed_url = urlparse(GET_REQUEST.url)
query_dictionary = parse_qs(parsed_url.query)

In [19]:
parsed_url

ParseResult(scheme='https', netloc='www.eco-visio.net', path='/api/aladdin/1.0.0/pbl/publicwebpage/data/100060100', params='', query='begin=20200621&end=20210921&step=4&domain=607&withNull=true&t=b3098cd11ab3cb7bd4e11ddf8ffdecbf88380ee9ce294faf79b86f886c38707a', fragment='')

In [20]:
query_dictionary = parse_qs(parsed_url.query)
query_dictionary

{'begin': ['20200621'],
 'end': ['20210921'],
 'step': ['4'],
 'domain': ['607'],
 'withNull': ['true'],
 't': ['b3098cd11ab3cb7bd4e11ddf8ffdecbf88380ee9ce294faf79b86f886c38707a']}

Define Query Parameters for fetching the Data

In [21]:
BEGIN = 20160101
END = 20210921
STEP = 2

In [22]:
query_dictionary["begin"] = BEGIN
query_dictionary["end"] = END
query_dictionary["step"] = STEP

In [23]:
query_dictionary

{'begin': 20160101,
 'end': 20210921,
 'step': 2,
 'domain': ['607'],
 'withNull': ['true'],
 't': ['b3098cd11ab3cb7bd4e11ddf8ffdecbf88380ee9ce294faf79b86f886c38707a']}

Make Request and fetch the data

In [24]:
url_endpoint = parsed_url.scheme + "://" + parsed_url.netloc + parsed_url.path
response = requests.get(url_endpoint, params=query_dictionary)
response

<Response [200]>

Convert fetched data stream to csv

In [25]:
data_json = json.loads(response.content)
data_json[0]

{'date': '2020-06-23 00:00:00', 'comptage': 0, 'timestamp': 1592870400000}

In [26]:
keys = data_json[0].keys()
date_list = []
comptage_list = []
timestamp_list = []

In [27]:
for data_object in data_json:
    date_list.append(data_object["date"])
    comptage_list.append(data_object["comptage"])
    timestamp_list.append(data_object["timestamp"])

In [28]:
columns = ["date", "comptage", "timestamp"]

In [29]:
# Calling DataFrame constructor after zipping
# both lists, with columns specified
df = pd.DataFrame(list(zip(date_list, comptage_list,timestamp_list)),
               columns =columns)

In [30]:
df.to_csv("test.csv")