# Oeffikator

## Setup
We start off with global varibale and function definition and important imports.

In [7]:
# pylint: disable-msg=C0103,C0114,C0413,E0401
# pycodestyle: disable=ignore
import os
import sys

module_path = os.path.abspath(os.path.join(".."))
sys.path.append(module_path)

# 'noqa' ignroes E402 error for pycodestyle on imports
import asyncio  # noqa
import datetime  # noqa
import time  # noqa
from concurrent.futures import ThreadPoolExecutor  # noqa

import geopy.distance  # noqa
import matplotlib.colors as mcolors  # noqa
import matplotlib.pyplot as plt  # noqa
import nest_asyncio  # noqa
import numpy as np  # noqa
import pandas as pd  # noqa
from point_iterator.grid_point_iterator import GridPointIterator  # noqa
from point_iterator.triangular_iterator_interface import TriangularPointIterator  # noqa
from requesters.bvg_rest_requester import BVGRestRequester  # noqa
from requesters.oeffi_requester import OeffiRequester  # noqa

nest_asyncio.apply()

In [None]:
DAY = datetime.datetime.today() + datetime.timedelta(days=1)
while DAY.weekday() != 0:
    DAY += datetime.timedelta(1)
cdict = {
    "red": ((0.0, 0.0, 0.0), (0.5, 0.0, 0.0), (1.0, 1.0, 1.0)),
    "blue": ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
    "green": ((0.0, 0.0, 1.0), (0.5, 0.0, 0.0), (1.0, 0.0, 0.0)),
}
CMAP = mcolors.LinearSegmentedColormap("my_colormap", cdict, 100)

In [None]:
def compute_distance(x1: float, y1: float, x2: float, y2: float) -> float:
    """computes the distance in kilometers between to coordinates (EPSG:4326)

    Args:
        x1 (float): latitude point 1
        y1 (float): longitude point 1
        x2 (float): latitude point 2
        y2 (float): longitude point 2

    Returns:
        float: distance between the two points in kilometers
    """
    return geopy.distance.distance((x1, y1), (x2, y2)).km


def to_BVG_sdt(x: float) -> int:
    """Transform a coordinate from the (more or less) standard formate to the BVG format.
    E.g. 13.47 -> 1347000

    Args:
        x (float): coordinate in standard format, e.g. 13.47

    Returns:
        int: cooridnate in BVG standard, e.g. 1347000
    """
    return int(x * 10e5)


async def start_async_process(
    current_requesters: list, origin: dict, destinations: list, starting_date: datetime.datetime
) -> list:
    """An asynchronous function which computes the travel time in minutes
    from an origin to a batch of destinations.

    Args:
        requesters (list): a list of requesters making calls to corresponding apis
        origin (dict): address of the origin
        destinations (list): a batch of destination coordinates
        starting_date (datetime.datetime): date of travel

    Returns:
        list: a list of travel times for the given destinations
    """
    amount_of_workers = len(destinations)
    batch_journeys = []
    with ThreadPoolExecutor(max_workers=amount_of_workers) as executor:
        async_loop = asyncio.get_event_loop()
        responses = [
            async_loop.run_in_executor(
                executor,
                current_requesters[i % len(current_requesters)].get_journey,
                *(
                    origin,
                    {
                        "longitude": str(destinations[i][0]),
                        "latitude": str(destinations[i][1]),
                        "address": "placeholder",
                    },
                    starting_date,
                )
            )
            for i in range(amount_of_workers)
        ]
        for batch_journey in await asyncio.gather(*responses):
            batch_journeys.append(batch_journey)
    return batch_journeys

## Crawl Data

Let's define some parameter first.

In [None]:
requester1 = BVGRestRequester()
requesters = [requester1]

AUTHKEY = ""
if AUTHKEY != "":
    requester2 = OeffiRequester(AUTHKEY)
    requesters.append(requester2)
    
query = "Brandenburger Tor"
start = requester1.query_location(query)
print("We gonne use following address: " + start["address"])

In [None]:
file_name = str(to_BVG_sdt(start["longitude"])) + "_" + str(to_BVG_sdt(start["latitude"])) + ".csv"
# in format left, right, bottom, top
# or xmin, xmax, ymin, ymax
bounding_box = (13.2756, 13.4892, 52.4677, 52.5532)
step_size_x = 0.02
step_size_y = step_size_x / 2

length = len(np.arange(bounding_box[0], bounding_box[1], step_size_x)) * len(
    np.arange(bounding_box[2], bounding_box[3], step_size_y)
)
print("Amount of requests: ", length)
print("Estimated time serial: " + str(int(length / 38)) + "min")
print("Estimated time parallel: " + str(int(length / 138)) + "min")

## Parallel computing

In [None]:
parallel_threads = 8
duration = 20  # minutes

In [None]:
i = 0
destination_i = 0
df = pd.DataFrame(columns=["longitude", "latitude", "Time"])
start_time = datetime.datetime.now()


grid_point_iterator = GridPointIterator(bounding_box, 3)
while grid_point_iterator.has_points_remaining():
    points = [next(grid_point_iterator) for _ in range(parallel_threads) if grid_point_iterator.has_points_remaining()]
    loop = asyncio.get_event_loop()
    journeys = asyncio.ensure_future(start_async_process(requesters, start, points, DAY))
    loop.run_until_complete(journeys)

    for journey in journeys.result():
        destination_i += 1
        i += 1
        df.loc[i] = [
            float(journey["destination"]["longitude"]),
            float(journey["destination"]["latitude"]),
            journey["arrivalTime"],
        ]
    print(f"So far {i} points were generated of which {destination_i} are destinations.")


time_taken_total = datetime.datetime.now() - start_time
print("This took us:")
print(time_taken_total)

In [None]:
triangular_point_iterator = TriangularPointIterator(np.array(list(zip(df["longitude"], df["latitude"]))))

start_time = datetime.datetime.now()
round_time = datetime.datetime.now()
while datetime.datetime.now() - start_time < datetime.timedelta(minutes=duration):
    # while destination_i < 48:
    available_requesters = [requester for requester in requesters if not requester.has_reached_request_limit()]
    for requester in available_requesters:
        print(requester, len(requester.past_requests))

    if available_requesters:
        triangular_point_iterator.points = np.array(list(zip(df["longitude"], df["latitude"])))
        points = [next(triangular_point_iterator) for _ in range(parallel_threads)]

        # Run asychnronous requests
        loop = asyncio.get_event_loop()
        journeys = asyncio.ensure_future(start_async_process(available_requesters, start, points, DAY))
        loop.run_until_complete(journeys)

        for journey in journeys.result():
            destination_i += 1
            if journey["stopovers"] is not None:
                for stop in journey["stopovers"]:
                    i += 1
                    df.loc[i] = [stop["longitude"], stop["latitude"], stop["time"]]
            i += 1
            df.loc[i] = [
                float(journey["destination"]["longitude"]),
                float(journey["destination"]["latitude"]),
                journey["arrivalTime"],
            ]
    else:
        print("All requesters have reached there request threshold. Sleeping ...")
        time.sleep(2)

    if datetime.datetime.now() - round_time > datetime.timedelta(seconds=60):
        df = df.drop_duplicates()
        df.to_csv("results/locations/new_" + file_name, index=True, header=False)
        round_time = datetime.datetime.now()

    print(f"So far {i} points were generated of which {destination_i} are destinations.")

df = df.drop_duplicates()
df.to_csv("results/locations/new_" + file_name, index=True, header=False)

time_taken_total = datetime.datetime.now() - start_time
print("This took us:")
print(time_taken_total)

## Plotting

Let's read data first.

In [None]:
start = requester1.query_location(query)
file_name_plotting = str(to_BVG_sdt(start["longitude"])) + "_" + str(to_BVG_sdt(start["latitude"])) + ".csv"
df = pd.read_csv(
    "results/locations/new_" + file_name_plotting,
    sep=",",
    index_col=0,
    names=["X", "Y", "Time"],
    na_values="None",
    dtype={"df": np.float32, "Y": np.float32, "Time": str},
)

Format the time from datetime formate to simple seconds (integer).

In [None]:
start_time = datetime.datetime.strptime("12000", "%H%M%S")
MAX_TRIP_TIME = 120

df = df.dropna()
df.drop(df[df["Time"] == "error"].index, inplace=True)
df.drop(df[~df["Time"].str.match(r"(\b\d{6}\b)", na=False)].index, inplace=True)
# df.loc[:, :] = df[df["Time"] != "error"]
# df.loc[:, :] = df[df["Time"].str.match(r"(\b\d{6}\b)", na=False)]  # Remove wrong time formate
df.loc[:, "Time"] = pd.to_datetime(df["Time"], format="%H%M%S")
df.loc[:, "Time"] = df["Time"] - start_time
df.loc[:, "Time"] = df["Time"].dt.total_seconds() / 60
df.drop(df[df["Time"] < 0].index, inplace=True)  # drop all items where time is negative
df.drop(df[df["Time"] > MAX_TRIP_TIME].index, inplace=True)  # drop all items where time is negative

It's time to plot:

In [None]:
# get the start location coordinates from the file name
start_location = tuple(int(coordinate) * 10e-7 for coordinate in file_name_plotting[:-4].split("_"))
# get the bounding box from the location
bounding_box_locations = (min(df["X"]), max(df["X"]), min(df["Y"]), max(df["Y"]))
# sets hard coded for the map "map_berlin_A.png"
bounding_box_map = (13.272, 13.491, 52.456, 52.563)
berlin_map = plt.imread("maps/map_berlin_A.png")
# define the amount of color levels should be there
levels = np.linspace(np.min(df["Time"]), np.max(df["Time"]), 32)

fig, ax = plt.subplots(figsize=(18, 18))
ax.set_xlim(bounding_box[0], bounding_box[1])
ax.set_ylim(bounding_box[2], bounding_box[3])
ax.tricontourf(df["X"], df["Y"], df["Time"], levels=levels, alpha=0.5, cmap=CMAP, antialiased=True)
# Displaying destination locations
# ax.scatter(df["X"], df["Y"], alpha=0.5, color = "brown")

# "aspect=1.65" as a magic number
ax.imshow(berlin_map, extent=bounding_box_map, aspect=1.65)
# plot the starting position
ax.plot(start_location[0], start_location[1], marker="*", markersize=20, color="tab:orange")

plt.savefig(f"results/images/map_{query}.png")

In [None]:
plt.savefig("results/images/map_berlin_sWedding.png")