In [None]:
# @title Install dependencies
!pip install jupyterlab-widgets geopandas geoplot
!pip install matplotlib --upgrade

In [None]:
# @title Select the whl file for the wawbus package
from ipywidgets import FileUpload
from IPython.display import display
upload = FileUpload(accept='.whl', multiple=False)
display(upload)

In [None]:
# @title upload whl file
for uploaded_filename in upload.value:
  content = upload.value[uploaded_filename]['content']
  with open(uploaded_filename, "w+b") as f:
    f.write(content)

In [None]:
# @title Install the wawbus package
!pip install {list(upload.value.keys())[0]}

In [None]:
from wawbus import WawBus
from wawbus.constants import M_TO_KM, CRS

import numpy as np
import pandas as pd
import geopandas as gpd
import geoplot.crs as gcrs
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib as mpl
import geoplot as gplt
from shapely.geometry import Point, LineString

from ipywidgets import Dropdown, Text, VBox, IntSlider

In [None]:
# @title Main api configuration
dataset = Dropdown(options=['20240207-66k', '20240208-140k', '20240209-1M', 'None'], description='Prefetched dataset')
api_key = Text(description='API key (optional)', placeholder='Enter your API key')
collection = IntSlider(description='How many data points to collect. If zero, only the prefetched dataset will be used', min=0, step=1)
display(VBox([dataset, api_key, collection]))

In [None]:
api = WawBus(apikey=api_key.value, dataset=dataset.value if dataset.value != 'None' else None)

In [None]:
if collection.value > 0:
    api.collect_positions(collection.value)

In [None]:
api.dataset.head()

In [None]:
df = api.calculate_late()
df.head()

In [None]:
# @title remove entries where distance to stop is > 0.8km as it is unlikely to be a valid match

labels = ['keeping', 'removed']
df['dist'].apply(lambda x: 1 if x > 0.8 else 0).value_counts().plot.pie(autopct='%1.1f%%', labels=labels)
plt.ylabel('')
plt.title('pie graph of invalid stop matches')
plt.show()

df = df[df.dist <= 0.8]

In [None]:
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Lon, df.Lat, crs=CRS))
gdf.head()

In [None]:
wawmap = gpd.read_file('https://raw.githubusercontent.com/C10udburst/wawbus-data/master/warszawa-dzielnice.geojson')

In [None]:
late_map = gpd.sjoin(left_df=gdf, right_df=wawmap[['name', 'geometry']], how="left", predicate="intersects")
late_map = late_map.drop(columns=['index_right'])
late_map.head()

In [None]:
# @title Interactive map of late buses and their respective stops
def make_line(row):
  return LineString([
      Point(row['Lon'], row['Lat']),
      Point(row['dlug_geo'], row['szer_geo'])
  ])

df1 = df.copy(deep=False)
df1['Time'] = df1['Time'].astype('str')
df1['czas'] = df1['czas'].astype('str')
df1['geometry'] = df1.apply(make_line, axis='columns')
df1 = df1.head(300)

gpd.GeoDataFrame(df1, geometry='geometry', crs=CRS)[['geometry','Lines', 'trasa', 'nr_zespolu', 'dist']].explore()

In [None]:
# @title late bus heatmap
mpl.rcParams['figure.dpi'] = 200

# we filter to only use entries within Warsaw
late_warsaw = late_map.dropna().geometry

webmap = gplt.webmap(late_warsaw, projection=gcrs.WebMercator())
ax = gplt.polyplot(wawmap, projection=gcrs.WebMercator(), zorder=1, ax=webmap)
heatmap = gplt.kdeplot(late_warsaw, cmap='Reds', shade=True, ax=ax, projection=gcrs.WebMercator(), alpha=0.4)
plt.title("late bus heatmap")
plt.show()

mpl.rcParams['figure.dpi'] = 100

In [None]:
# @title late bus by district
late_map.dropna().groupby('name')['dist'].count()

In [None]:
# @title late per district pie plot
late_map[late_map.name != 'Warszawa'].dropna().groupby('name')['dist'].count().plot.pie(autopct='%1.1f%%')
plt.title("late per district pie plot")
plt.ylabel('')
plt.show()

In [None]:
# @title late instances per hour and minute top 20 entries

ax = df.groupby(by=[df.Time.map(lambda x : x.hour),
                       df.Time.map(lambda x : x.minute)])['dist'].count().sort_values(ascending=False).head(20).plot.barh()

ax.bar_label(ax.containers[0])

plt.ylabel("(hour, minute)")
plt.title("late instances by hour and minute")
plt.show()

In [None]:
# @title late instances by line number top 17 entries

ax = df.groupby('Lines')['dist'].count().sort_values(ascending=False).head(17).plot.barh()

ax.bar_label(ax.containers[0])

plt.title("late instances by line number top 17 entries")
plt.show()

In [None]:
# @title late instances by route number top 17 entries

ax = df.groupby('trasa')['dist'].count().sort_values(ascending=False).head(17).plot.barh()

ax.bar_label(ax.containers[0])

plt.title("late instances by route number top 17 entries")
plt.show()