# Agencies

### Downloads + Imports

In [None]:
# https://www.vbb.de/media/download/2029
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen

resp = urlopen("https://www.vbb.de/media/download/2029")
%time zipfile = ZipFile(BytesIO(resp.read()))
zipfile.namelist()

In [None]:
import pandas as pd
import numpy as np
import folium
import holoviews as hv
import holoviews.operation.datashader as hd
hd.shade.cmap=["lightblue", "darkblue"]
hv.extension("bokeh", "matplotlib") 
import datashader as ds
import datashader.transfer_functions as tf

### Setup Plotting

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
sns.set_style(
    style='darkgrid', 
    rc={'axes.facecolor': '.9', 'grid.color': '.8'}
)
sns.set_palette(palette='deep')
sns_c = sns.color_palette(palette='deep')
%matplotlib inline
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['figure.dpi'] = 100

In [None]:
from bokeh.plotting import figure, output_notebook, show, reset_output
from bokeh.tile_providers import OSM, get_provider

output_notebook()

#### Read and format data


In [None]:
%time agencies = pd.read_csv(zipfile.open('agency.txt'))
agencies.tail()
agencies.info()

In [None]:
agencies.head()

In [None]:
%time routes = pd.read_csv(zipfile.open('routes.txt'))
routes.tail()

routes = routes.join(agencies[['agency_id','agency_name']].set_index('agency_id'), on='agency_id')
routes.head()

In [None]:
routes['agency_name'].value_counts().head()

In [None]:
rename = {2: "Intercity Rail Service", 100: "Railway Service", 109: "Suburban Railway", 400: "Urban Railway Service", 700: "Bus Service", 900: "Tram Service", 1000: "Water Transport Service"}
routes['route_type'].replace(rename, inplace=True)
routes.head()

In [None]:
routes_sorted = routes.groupby(['route_type', 'agency_name']).size().reset_index(name="count")
routes_sorted['max'] = routes_sorted.groupby('agency_name')['count'].transform('sum')
routes_sorted.loc[routes_sorted['max'] < 40, 'agency_name'] = 'Other'
routes_sorted = routes_sorted.sort_values(["max",'agency_name',"count"], ascending=False).drop('max', axis=1)
t = routes_sorted.groupby(['route_type', 'agency_name']).aggregate({'count': 'sum'}).reset_index()
t = t.assign(
    ac = lambda x: x.groupby(['route_type'])['count'].transform(np.sum),
    share = lambda x: x['count'].div(x['ac'])
)
t = t.pivot(index='route_type', columns='agency_name', values='share')
t.insert(len(t.columns)-1, 'Other', t.pop("Other"))
t.fillna(0.0, inplace=True)

In [None]:
fig, ax = plt.subplots(figsize=(15,6))
cmap = sns.light_palette(sns_c[0])
fmt = lambda y, _: f'{y :0.0%}'
t.pipe((sns.heatmap, 'data'), 
        vmin=0.0,
        vmax=1.0,
        cmap="YlGnBu",
        linewidths=0.1, 
        linecolor='black',
        annot=True, 
        fmt='0.2%',
        cbar_kws={'format': mtick.FuncFormatter(fmt)},
        ax=ax
    )
ax.set(title='Agency Type Share per Route Type');