# Oeffikator

## Setup
We start off with global varibale and function definition and important imports.

In [None]:
import asyncio
import datetime
import geopy.distance
import json
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import multiprocessing as mp
import nest_asyncio
import numpy as np
import os
import pandas as pd
import random
import requests
import time

from apis.BVGRestAPI import BVGRestAPI
from apis.OeffiAPI import OeffiAPI
from concurrent.futures import ThreadPoolExecutor
from point_generator.GridPointGenerator import GridPointGenerator
from point_generator.TriangularPointGenerator import TriangularPointGenerator
from scipy.spatial import Delaunay
from TimeTransformer import TimeTransformer

nest_asyncio.apply()

In [None]:
DAY = datetime.datetime.today() + datetime.timedelta(days=1)
while DAY.weekday() != 0:
    DAY += datetime.timedelta(1)
cdict = {
    "red": ((0.0, 0.0, 0.0), (0.5, 0.0, 0.0), (1.0, 1.0, 1.0)),
    "blue": ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
    "green": ((0.0, 0.0, 1.0), (0.5, 0.0, 0.0), (1.0, 0.0, 0.0)),
}
CMAP = mcolors.LinearSegmentedColormap("my_colormap", cdict, 100)

In [None]:
def compute_distance(x1, y1, x2, y2):
    return geopy.distance.distance((x1, y1), (x2, y2)).km


def to_BVG_sdt(x: float) -> int:
    return int(x * 10e5)


async def start_async_process(apis, origin, destinations, starting_date):
    amount_of_workers = len(destinations)
    journeys = []
    with ThreadPoolExecutor(max_workers=amount_of_workers) as executor:
        loop = asyncio.get_event_loop()
        responses = [
            loop.run_in_executor(
                executor,
                apis[i % len(apis)].get_journey,
                *(
                    origin,
                    {
                        "longitude": str(destinations[i][0]),
                        "latitude": str(destinations[i][1]),
                        "address": "placeholder",
                    },
                    starting_date,
                )
            )
            for i in range(amount_of_workers)
        ]
        for journey in await asyncio.gather(*responses):
            journeys.append(journey)
    return journeys

## Crawl Data

Let's define some parameter first.

In [None]:
api = BVGRestAPI()
api2 = OeffiAPI()
apis = [api, api2]
query = "TU Berlin, Straße des 17 Juni"
start = api.query_location(query)
print("We gonne use following address: " + start["address"])

In [None]:
file_name = str(to_BVG_sdt(start["longitude"])) + "_" + str(to_BVG_sdt(start["latitude"])) + ".csv"
# in format left, right, bottom, top
# or xmin, xmax, ymin, ymax
bounding_box = (13.2756, 13.4892, 52.4677, 52.5532)
step_size_x = 0.02
step_size_y = step_size_x / 2

length = len(np.arange(bounding_box[0], bounding_box[1], step_size_x)) * len(
    np.arange(bounding_box[2], bounding_box[3], step_size_y)
)
print("Amount of requests: ", length)
print("Estimated time serial: " + str(int(length / 38)) + "min")
print("Estimated time parallel: " + str(int(length / 138)) + "min")

## Parallel computing

In [None]:
parallel_threads = 8
duration = 1  # minutes

In [None]:
i = 0
destination_i = 0
df = pd.DataFrame(columns=["longitude", "latitude", "Time"])
start_time = datetime.datetime.now()


grid_point_generator = GridPointGenerator(bounding_box)
while grid_point_generator.has_points_remaining():
    points = grid_point_generator.get_next_points(parallel_threads)
    loop = asyncio.get_event_loop()
    journeys = asyncio.ensure_future(start_async_process(apis, start, points, DAY))
    loop.run_until_complete(journeys)

    for journey in journeys.result():
        destination_i += 1
        i += 1
        df.loc[i] = [
            float(journey["destination"]["longitude"]),
            float(journey["destination"]["latitude"]),
            journey["arrivalTime"],
        ]
    print(f"So far {i} points were generated of which {destination_i} are destinations.")


time_taken_total = datetime.datetime.now() - start_time
print("This took us:")
print(time_taken_total)

In [None]:
triangular_point_generator = TriangularPointGenerator()

start_time = datetime.datetime.now()
round_time = datetime.datetime.now()
while datetime.datetime.now() - start_time < datetime.timedelta(minutes=duration):
    # while destination_i < 48:
    available_apis = [api for api in apis if not api.has_reached_request_limit()]
    for api in available_apis:
        print(api, len(api.past_requests))

    if available_apis:
        points = triangular_point_generator.get_next_points(
            np.array(list(zip(df["longitude"], df["latitude"]))), parallel_threads
        )

        # Run asychnronous requests
        loop = asyncio.get_event_loop()
        journeys = asyncio.ensure_future(start_async_process(available_apis, start, points, DAY))
        loop.run_until_complete(journeys)

        for journey in journeys.result():
            destination_i += 1
            if journey["stopovers"] != None:
                for stop in journey["stopovers"]:
                    i += 1
                    df.loc[i] = [stop["longitude"], stop["latitude"], stop["time"]]
            i += 1
            df.loc[i] = [
                float(journey["destination"]["longitude"]),
                float(journey["destination"]["latitude"]),
                journey["arrivalTime"],
            ]
    else:
        print("All apis have reached there request threshold. Sleeping ...")
        time.sleep(2)

    if datetime.datetime.now() - round_time > datetime.timedelta(seconds=60):
        df = df.drop_duplicates()
        df.to_csv("results/new_" + file_name, index=True, header=False)
        round_time = datetime.datetime.now()

    print(f"So far {i} points were generated of which {destination_i} are destinations.")

df = df.drop_duplicates()
df.to_csv("results/new_" + file_name, index=True, header=False)

time_taken_total = datetime.datetime.now() - start_time
print("This took us:")
print(time_taken_total)

Running the requests asynchonously which speeds up the requests per minute up to > 200.

## Linear Computing

And now let's get the points.

In [None]:
i = 0
destination_i = 0
df = pd.DataFrame(columns=["X", "Y", "Time"])
start_time = datetime.datetime.now()
for x in np.arange(bounding_box[0], bounding_box[1], step_size_x):
    for y in np.arange(bounding_box[2], bounding_box[3], step_size_y):
        destination_i += 1
        destination = {"longitude": str(x), "latitude": str(y), "address": "placeholder"}
        print(destination_i / length * 100, "% Fortschritt, es fehlen", length - destination_i)
        print("Current Destination: " + str(x) + ", " + str(y))
        #try:
            # time.sleep(60/100)
        journey = api.get_journey(start, destination, DAY)
        for stop in journey["stopovers"]:
            i += 1
            df.loc[i] = [stop["longitude"], stop["latitude"], stop["time"]]
        i += 1
        df.loc[i] = [
            journey["destination"]["longitude"],
            journey["destination"]["latitude"],
            journey["arrivalTime"],
        ]
        #except (ValueError, KeyError):
        #    df.loc[i] = [destination["longitude"], destination["latitude"], "error"]
        #    continue

        if datetime.datetime.now() - start_time > datetime.timedelta(seconds=32):
            df = df.drop_duplicates()
            df.to_csv("results/" + file_name, index=True, header=False)
            start_time = datetime.datetime.now()

df = df.drop_duplicates()
df.to_csv("results/" + file_name, index=True, header=False)

And one alternative method of computing the map.

(Different Method to generate directions https://stackoverflow.com/questions/57539749/find-out-centre-of-the-most-dense-region-in-a-scatter-plot )

In [None]:
# Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents
# k = kde.gaussian_kde(data.T)
# xi, yi = np.mgrid[x.min():x.max():nbins*1j, y.min():y.max():nbins*1j]
# zi = k(np.vstack([xi.flatten(), yi.flatten()]))

Time to get the data.

## Plotting

Let's read data first.

In [None]:
start = api.query_location(query)
file_name_plottting = str(to_BVG_sdt(start["longitude"])) + "_" + str(to_BVG_sdt(start["latitude"])) + ".csv"
df = pd.read_csv(
    "results/new_" + file_name_plottting,
    sep=",",
    index_col=0,
    names=["X", "Y", "Time"],
    na_values="None",
    dtype={"X": np.float32, "Y": np.float32, "Time": str},
)

And then transform it with some custom pipelines.

In [None]:
time_transformer = TimeTransformer()
dfn = time_transformer.transform(df)

It's time to plot:

In [None]:
# get the start location coordinates from the file name
start_location = tuple([int(coordinate) * 10e-7 for coordinate in file_name_plottting[:-4].split("_")])
# get the bounding box from the location
bounding_box_locations = (min(dfn["X"]), max(dfn["X"]), min(dfn["Y"]), max(dfn["Y"]))
# sets hard coded for the map "map_berlin_A.png"
bounding_box_map = (13.272, 13.491, 52.456, 52.563)
map = plt.imread("maps/map_berlin_A.png")
# define the amount of color levels should be there
levels = np.linspace(np.min(dfn["Time"]), np.max(dfn["Time"]), 32)

fig, ax = plt.subplots(figsize=(18, 18))
ax.set_xlim(bounding_box[0], bounding_box[1])
ax.set_ylim(bounding_box[2], bounding_box[3])
ax.tricontourf(dfn["X"], dfn["Y"], dfn["Time"], levels=levels, alpha=0.5, cmap=CMAP, antialiased=True)
# Displaying destination locations
# ax.scatter(dfn["X"], dfn["Y"], alpha=0.5, color = "brown")

# "aspect=1.65" as a magic number
ax.imshow(map, extent=bounding_box_map, aspect=1.65)
# plot the starting position
ax.plot(start_location[0], start_location[1], marker="*", markersize=20, color="tab:orange")

plt.savefig("results/images/map_Berlin_BranderburgerTor.png")

In [None]:
plt.savefig("results/images/map_berlin_sWedding.png")

## Creating a map
This code is copied from following website: http://bigmap.osmz.ru/

In [None]:
#!/usr/bin/env python
# Generated by BigMap 2. Permalink: http://bigmap.osmz.ru/bigmap.php?xmin=17592&xmax=17611&ymin=10740&ymax=10755&zoom=15&scale=256&tiles=osm-de

import io, urllib.request, datetime, time, re, random
from PIL import Image, ImageDraw

# ^^^^^^ install "python-pillow" package | pip install Pillow | easy_install Pillow

(zoom, xmin, ymin, xmax, ymax) = (15, 17592, 10740, 17611, 10755)
layers = ["https://{abc}.tile.openstreetmap.de/tiles/osmde/!z/!x/!y.png"]
attribution = "Map data (c) OpenStreetMap, Tiles (c) OSM DE"
xsize = xmax - xmin + 1
ysize = ymax - ymin + 1
tilesize = 256

resultImage = Image.new("RGBA", (xsize * tilesize, ysize * tilesize), (0, 0, 0, 0))
counter = 0
for x in range(xmin, xmax + 1):
    for y in range(ymin, ymax + 1):
        for layer in layers:
            url = layer.replace("!x", str(x)).replace("!y", str(y)).replace("!z", str(zoom))
            match = re.search("{([a-z0-9]+)}", url)
            if match:
                url = url.replace(match.group(0), random.choice(match.group(1)))
            print(url, "... ")
            try:
                req = urllib.request.Request(url, headers={"User-Agent": "BigMap/2.0"})
                tile = urllib.request.urlopen(req).read()
            except Exception as e:
                print("Error", e)
                continue
            image = Image.open(io.BytesIO(tile))
            resultImage.paste(image, ((x - xmin) * tilesize, (y - ymin) * tilesize), image.convert("RGBA"))
            counter += 1
            if counter == 10:
                time.sleep(2)
                counter = 0

draw = ImageDraw.Draw(resultImage)
draw.text((5, ysize * tilesize - 15), attribution, (0, 0, 0))
del draw

now = datetime.datetime.now()
outputFileName = "maps/map_Berlin_A_high.png"  # % (zoom, now.year % 100, now.month, now.day, now.hour, now.minute)
resultImage.save(outputFileName)