In [1]:
from seleniumwire import webdriver
import time
import pathlib
import os
import requests
from urllib.parse import urlparse, parse_qs
from io import BytesIO
import pandas as pd
import json


# Get metadata of all bike counting stations of a certain city

In [2]:
URL_STUTTGART = 'https://data.eco-counter.com/ParcPublic/?id=607#'

In [3]:
# get driver path
current_path = pathlib.Path().resolve()
driver_path = os.path.join(current_path, "chromedriver")

driver = webdriver.Chrome(driver_path)

driver.get(URL_STUTTGART)

# wait for page to load competletely
time.sleep(1)

In [4]:
# get all requests made by the page
all_requests = driver.requests

In [5]:
# define target url, which shall be filtered out
TARGET_URL = "www.eco-visio.net"
GET_REQUEST = ""

Filter out the get request made to the database and get the metadata of all bike counting stations in a city

In [6]:
for request in all_requests:
    # parse url and filter out query param
    parsed_url = urlparse(request.url)
    query_dictionary = parse_qs(parsed_url.query)
    # check if target url matches
    if (parsed_url.netloc == TARGET_URL):
        GET_REQUEST = request
        print(GET_REQUEST.url)
        break

https://www.eco-visio.net/api/aladdin/1.0.0/pbl/publicwebpageplus/607?withNull=true


Fetch metadata information of all counting stations of a certain city

In [7]:
url_endpoint = GET_REQUEST
response = requests.get(url_endpoint)
response

<Response [200]>

extract relevant metadata of the counting stations and save it to json file

In [8]:
data_json = json.loads(response.content)

In [9]:
counting_station_list = []
for counting_station in data_json:
    counting_station_dict = {}
    counting_station_dict["id"] = counting_station["idPdc"]
    counting_station_dict["name"] = counting_station["nom"]
    counting_station_dict["latitude"] = counting_station["lat"]
    counting_station_dict["longitude"] = counting_station["lon"]
    counting_station_dict["start_time"] = counting_station["debut"]
    counting_station_list.append(counting_station_dict) 

In [10]:
data = {}
data['Stuttgart'] = counting_station_list

In [11]:
with open('metadata/counting_stations_metadata.json', 'w') as outfile:
    json.dump(data, outfile)

# Get data from bike counting stations

In [17]:
df_metadata_counting_stations = pd.read_csv('/metadata/counting_stations_germany_metadata.csv', index_col = 0) 
df_metadata_counting_stations[:5]

Unnamed: 0,name,number,latitude,longitude,id_list
0,Sommerda,1,51.287374,11.060599,100055269
1,Stuttgart,1,48.78424,9.147031,100063203
2,Stuttgart,2,48.826,9.21488,100063205
3,Stuttgart,3,48.716494,9.08652,100061257
4,Stuttgart,4,48.739821,9.152228,100061633


In [96]:
df_row = df_metadata_counting_stations.loc[df_metadata_counting_stations['id_list'] == 100055269]
print(df_row["name"].values[0])
df_row["number"][0]

Sommerda


'1'

In [107]:
# get ids of all stations
counting_station_ids = list(df_metadata_counting_stations["id_list"])

In [109]:
BEGIN = 20160101
END = 20210926
STEP = 2

In [110]:
# Create URL for specific bike counting station

def get_data_api_request(counting_station_id):
    driver = webdriver.Chrome(driver_path)

    # define target url, which shall be filtered out
    TARGET_URL = "www.eco-visio.net"
    target_get_request = ""
    BASE_URL = "https://data.eco-counter.com/public2/?id=" +  str(counting_station_id)
    driver.get(BASE_URL)
    # wait for page to load competletely
    time.sleep(1)

    # get all requests made by the page
    all_requests = driver.requests
    
    for request in all_requests:
        # parse url and filter out query param
        parsed_url = urlparse(request.url)
        query_dictionary = parse_qs(parsed_url.query)
        # check if target url matches and has a token in query
        if (parsed_url.netloc == TARGET_URL and "t" in query_dictionary):
            target_get_request = request
            break
            
            
    driver.close()
    return target_get_request

# Make Request and fetch the data
    
def create_data_api_request(get_request):
    parsed_url = urlparse(get_request.url)
    query_dictionary = parse_qs(parsed_url.query)
    
    query_dictionary = parse_qs(parsed_url.query)
    
    # Define Query Parameters for fetching the Data
    
    query_dictionary["begin"] = BEGIN
    query_dictionary["end"] = END
    query_dictionary["step"] = STEP
    
    url_endpoint = parsed_url.scheme + "://" + parsed_url.netloc + parsed_url.path
    return url_endpoint, query_dictionary

# Convert fetched data stream to csv

def save_data_to_csv(data_json, counting_station_id):
    keys = data_json[0].keys()
    date_list = []
    comptage_list = []
    timestamp_list = []   
    columns = ["date", "comptage", "timestamp"]
    for data_object in data_json:
        date_list.append(data_object["date"])
        comptage_list.append(data_object["comptage"])
        timestamp_list.append(data_object["timestamp"])    
    # Calling DataFrame constructor after zipping
    # both lists, with columns specified
    df = pd.DataFrame(list(zip(date_list, comptage_list,timestamp_list)),
                   columns =columns)  
    
    #create name
    df_row = df_metadata_counting_stations.loc[df_metadata_counting_stations['id_list'] == counting_station_id]  
    file_name = df_row["name"].values[0] + "_" + df_row["number"].values[0] + ".csv"
    
    path = os.path.join("data",file_name )
    
    df.to_csv(path)

In [115]:
found_data_list = []

for counting_station_id in counting_station_ids:
    
    get_request = get_data_api_request(counting_station_id)
    if get_request != "":
        
        url_endpoint, query_dictionary = create_data_api_request(get_request)
        response = requests.get(url_endpoint, params=query_dictionary)
        print(response)
        
        data_json = json.loads(response.content)
        save_data_to_csv(data_json, counting_station_id)
        
        found_data_list.append(1)
    else:
        
        found_data_list.append(0)        
    
    

<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>


In [116]:
len(found_data_list)

56

In [117]:
df_metadata_counting_stations['fetched data ?'] = found_data_list
  



In [118]:
df_metadata_counting_stations

Unnamed: 0,name,number,latitude,longitude,id_list,fetched data ?
0,Sommerda,1,51.287374,11.060599,100055269,0
1,Stuttgart,1,48.78424,9.147031,100063203,1
2,Stuttgart,2,48.826,9.21488,100063205,0
3,Stuttgart,3,48.716494,9.08652,100061257,0
4,Stuttgart,4,48.739821,9.152228,100061633,1
5,Stuttgart,5,48.740003,9.226692,100061648,1
6,Stuttgart,6,48.811509,9.167497,100062943,1
7,Stuttgart,7,48.809354,9.105381,100062945,1
8,Stuttgart,8,48.779637,9.248607,100063204,1
9,Arnsberg,1,51.440702,7.964296,100063831,0
