In [1]:
from seleniumwire import webdriver
import time
import pathlib
import os
import requests
from urllib.parse import urlparse, parse_qs
from io import BytesIO
import pandas as pd
import json


# Get metadata of all bike counting stations of a certain city

In [33]:
URL_STUTTGART = 'https://data.eco-counter.com/ParcPublic/?id=607#'

In [35]:
# get driver path
current_path = pathlib.Path().resolve()
driver_path = os.path.join(current_path, "chromedriver")

driver = webdriver.Chrome(driver_path)

driver.get(URL_STUTTGART)

# wait for page to load competletely
time.sleep(1)

In [36]:
# get all requests made by the page
all_requests = driver.requests

In [37]:
# define target url, which shall be filtered out
TARGET_URL = "www.eco-visio.net"
GET_REQUEST = ""

Filter out the get request made to the database and get the metadata of all bike counting stations in a city

In [38]:
for request in all_requests:
    # parse url and filter out query param
    parsed_url = urlparse(request.url)
    query_dictionary = parse_qs(parsed_url.query)
    # check if target url matches
    if (parsed_url.netloc == TARGET_URL):
        GET_REQUEST = request
        print(GET_REQUEST.url)
        break

https://www.eco-visio.net/api/aladdin/1.0.0/pbl/publicwebpageplus/607?withNull=true


Fetch metadata information of all counting stations of a certain city

In [39]:
url_endpoint = GET_REQUEST
response = requests.get(url_endpoint)
response

<Response [200]>

extract relevant metadata of the counting stations and save it to json file

In [44]:
data_json = json.loads(response.content)

In [45]:
counting_station_list = []
for counting_station in data_json:
    counting_station_dict = {}
    counting_station_dict["id"] = counting_station["idPdc"]
    counting_station_dict["name"] = counting_station["nom"]
    counting_station_dict["latitude"] = counting_station["lat"]
    counting_station_dict["longitude"] = counting_station["lon"]
    counting_station_dict["start_time"] = counting_station["debut"]
    counting_station_list.append(counting_station_dict) 

In [46]:
data = {}
data['Stuttgart'] = counting_station_list

In [47]:
with open('counting_stations_metadata.json', 'w') as outfile:
    json.dump(data, outfile)

# Get data from bike counting stations

In [15]:
file = open('counting_stations_metadata.json',)
counting_stations_metadata = json.load(file)
# select counting station


In [48]:
counting_stations_metadata

{'Stuttgart': [{'id': 100034882,
   'name': 'König-Karls-Brücke Barometer',
   'latitude': 48.79912609299229,
   'longitude': 9.210372161982697,
   'start_time': '01/01/2014'},
  {'id': 100034894,
   'name': 'Böblinger Straße',
   'latitude': 48.74661091153218,
   'longitude': 9.135662267916839,
   'start_time': '01/01/2014'},
  {'id': 100042486,
   'name': 'Taubenheimstraße',
   'latitude': 48.804909684344075,
   'longitude': 9.226946272791574,
   'start_time': '01/01/2014'},
  {'id': 100042487,
   'name': 'Waiblinger Straße',
   'latitude': 48.80513535517377,
   'longitude': 9.225987627445804,
   'start_time': '01/01/2014'},
  {'id': 100060100,
   'name': 'Samaraweg',
   'latitude': 48.80811,
   'longitude': 9.18551,
   'start_time': '01/01/2014'},
  {'id': 100061257,
   'name': 'Waldburgstraße',
   'latitude': 48.716494,
   'longitude': 9.08652,
   'start_time': '01/01/2014'},
  {'id': 100061633,
   'name': 'Kremmlerstraße',
   'latitude': 48.739821,
   'longitude': 9.152228,
   'st

In [None]:
i = 4

Create URL for specific bike counting station

In [49]:
BASE_URL = "https://data.eco-counter.com/public2/?id=" +  str(counting_stations_metadata["Stuttgart"][i]["id"])

In [17]:
driver.get(BASE_URL)

# wait for page to load competletely
time.sleep(1)

# get all requests made by the page
all_requests = driver.requests

# define target url, which shall be filtered out
TARGET_URL = "www.eco-visio.net"
GET_REQUEST = ""

In [18]:
for request in all_requests:
    # parse url and filter out query param
    parsed_url = urlparse(request.url)
    query_dictionary = parse_qs(parsed_url.query)
    # check if target url matches and has a token in query
    if (parsed_url.netloc == TARGET_URL and "t" in query_dictionary):
        GET_REQUEST = request
        print(GET_REQUEST.url)
        break

https://www.eco-visio.net/api/aladdin/1.0.0/pbl/publicwebpage/data/101034882?begin=20120701&end=20210921&step=4&domain=607&t=539b0b5e3aa991d2e60ebfc0136bb099d27a5c26b2a2a413cd0f3b77bd5b2f2e&withNull=true


In [19]:
parsed_url = urlparse(GET_REQUEST.url)
query_dictionary = parse_qs(parsed_url.query)

In [20]:
parsed_url

ParseResult(scheme='https', netloc='www.eco-visio.net', path='/api/aladdin/1.0.0/pbl/publicwebpage/data/101034882', params='', query='begin=20120701&end=20210921&step=4&domain=607&t=539b0b5e3aa991d2e60ebfc0136bb099d27a5c26b2a2a413cd0f3b77bd5b2f2e&withNull=true', fragment='')

In [21]:
query_dictionary = parse_qs(parsed_url.query)
query_dictionary

{'begin': ['20120701'],
 'end': ['20210921'],
 'step': ['4'],
 'domain': ['607'],
 't': ['539b0b5e3aa991d2e60ebfc0136bb099d27a5c26b2a2a413cd0f3b77bd5b2f2e'],
 'withNull': ['true']}

Define Query Parameters for fetching the Data

In [50]:
BEGIN = 20160101
END = 20210921
STEP = 2

In [51]:
query_dictionary["begin"] = BEGIN
query_dictionary["end"] = END
query_dictionary["step"] = STEP

In [52]:
query_dictionary

{'withNull': ['true'], 'begin': 20160101, 'end': 20210921, 'step': 2}

Make Request and fetch the data

In [53]:
url_endpoint = parsed_url.scheme + "://" + parsed_url.netloc + parsed_url.path
response = requests.get(url_endpoint, params=query_dictionary)
response

<Response [200]>

Convert fetched data stream to csv

In [54]:
data_json = json.loads(response.content)
data_json[0]

{'token': None,
 'id_pdc_img': 0,
 'idPdc': 100034882,
 'lat': 48.79912609299229,
 'lon': 9.210372161982697,
 'nom': 'König-Karls-Brücke Barometer',
 'publicMessage': None,
 'photo': [{'lien': 'https://filer.eco-counter-tools.com/file/88/d6fbfa7f9c35d3b68cdaa2785a1df5cff4602c442b0d5983c2c5a321f7168088/Koenig_Karl_Bruecke_Radzaehlstelle_0059.JPG'},
  {'lien': 'https://filer.eco-counter-tools.com/file/a2/69adabd2683d6bafba504d778721d417cb73453f3dd06e3849bd10d1bba2fda2/Koenig_Karl_Bruecke_Radzaehlstelle_0147.JPG'},
  {'lien': 'https://filer.eco-counter-tools.com/file/45/ad6a83dcd2ec6cccb1f3cef91c205c908938a43decf9592b2292089ef728c645/Koenig_Karl_Bruecke_Radzaehlstelle_0023.JPG'}],
 'lienPublic': '100034882',
 'pratique': [{'pratique': 2, 'id': 101034882},
  {'pratique': 2, 'id': 102034882},
  {'pratique': 2, 'id': 103034882},
  {'pratique': 2, 'id': 104034882}],
 'mainPratique': 2,
 'filtre': None,
 'formule_site': None,
 'fin': None,
 'debut': '01/01/2014',
 'finPeriode': '20210921',
 'd

In [55]:
keys = data_json[0].keys()
date_list = []
comptage_list = []
timestamp_list = []

In [28]:
for data_object in data_json:
    date_list.append(data_object["date"])
    comptage_list.append(data_object["comptage"])
    timestamp_list.append(data_object["timestamp"])

In [29]:
columns = ["date", "comptage", "timestamp"]

In [30]:
# Calling DataFrame constructor after zipping
# both lists, with columns specified
df = pd.DataFrame(list(zip(date_list, comptage_list,timestamp_list)),
               columns =columns)

In [31]:
df.to_csv("test.csv")