In [None]:
# @title Install dependencies
!pip install jupyterlab-widgets geopandas geoplot
!pip install matplotlib --upgrade

In [None]:
# @title Select the whl file for the wawbus package
from ipywidgets import FileUpload
from IPython.display import display
upload = FileUpload(accept='.whl', multiple=False)
display(upload)

In [None]:
# @title upload whl file
for uploaded_filename in upload.value:
  content = upload.value[uploaded_filename]['content']
  with open(uploaded_filename, "w+b") as f:
    f.write(content)

In [None]:
# @title Install the wawbus package
!pip install {list(upload.value.keys())[0]}

In [None]:
from wawbus import WawBus
from wawbus.constants import MAX_SPEED, CRS

import numpy as np
import pandas as pd
import geopandas as gpd
import geoplot.crs as gcrs
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib as mpl
import geoplot as gplt

from ipywidgets import Dropdown, Text, VBox, IntSlider

In [None]:
# @title Main api configuration
dataset = Dropdown(options=['20240207-66k', '20240208-140k', '20240209-1M', 'None'], description='Prefetched dataset')
api_key = Text(description='API key (optional)', placeholder='Enter your API key')
collection = IntSlider(description='How many data points to collect. If zero, only the prefetched dataset will be used', min=0, step=1)
display(VBox([dataset, api_key, collection]))

In [None]:
api = WawBus(apikey=api_key.value, dataset=dataset.value if dataset.value != 'None' else None)

In [None]:
if collection.value > 0:
    api.collect_positions(collection.value)

In [None]:
api.dataset.head()

In [None]:
df = api.calculate_speed()
df.head()

In [None]:
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Lon, df.Lat, crs=CRS))
gdf.head()

In [None]:
speeding = gdf[(gdf.Speed > 50)]

In [None]:
# @title pie graph of invalid speed (>MAX_SPEED)
labels = ['valid speed', 'invalid speed']
speeding['Speed'].apply(lambda x: 1 if x > MAX_SPEED else 0).value_counts().plot.pie(autopct='%1.1f%%', labels=labels)
plt.ylabel('')
plt.title('pie graph of invalid speed (>MAX_SPEED)')
plt.show()

In [None]:
# @title removal of invalid speed entries
speeding = speeding[speeding.Speed <= MAX_SPEED]

In [None]:
# @title 3250 entries of >50km/h interactive map
speeding.head(3250)[['Speed', 'geometry', 'VehicleNumber', 'Lines']].explore(
    column='Speed',
    style_kwds={"style_function":lambda x: {"radius":x["properties"]["Speed"]/5 - 4}}
)

In [None]:
wawmap = gpd.read_file('https://raw.githubusercontent.com/C10udburst/wawbus-data/master/warszawa-dzielnice.geojson')

In [None]:
speed_map = gpd.sjoin(left_df=speeding, right_df=wawmap[['name', 'geometry']], how="left", predicate="intersects")
speed_map = speed_map.drop(columns=['index_right'])
speed_map.head()

In [None]:
# @title >50km/h heatmap
mpl.rcParams['figure.dpi'] = 200

# we filter to only use entries within Warsaw
speeding_warsaw = speed_map.dropna().geometry

webmap = gplt.webmap(speeding_warsaw, projection=gcrs.WebMercator())
ax = gplt.polyplot(wawmap, projection=gcrs.WebMercator(), zorder=1, ax=webmap)
heatmap = gplt.kdeplot(speeding_warsaw, cmap='Reds', shade=True, ax=ax, projection=gcrs.WebMercator(), alpha=0.4)
plt.title(">50km/h heatmap")
plt.show()

mpl.rcParams['figure.dpi'] = 100

In [None]:
# @title >50 km/h per district counts
speed_map.dropna().groupby('name')['Speed'].count()

In [None]:
# @title >50 km/h per district pie plot
speed_map[speed_map.name != 'Warszawa'].dropna().groupby('name')['Speed'].count().plot.pie(autopct='%1.1f%%')
plt.title(">50 km/h per district pie plot")
plt.ylabel('')
plt.show()

In [None]:
# @title speed histogram for >50km/h
speed_map.dropna().plot.hist(column='Speed', bins=20)
try:
  plt.avxline(speed_map.dropna().mean(), linestyle='dashed', linewidth=1)
except AttributeError:
  pass # ignore old matplotlib error
plt.title("speed histogram for >50km/h")
plt.show()

In [None]:
# @title >50km/h instances per hour and minute top 20 entries

ax = speeding.groupby(by=[speeding.Time.map(lambda x : x.hour),
                       speeding.Time.map(lambda x : x.minute)])['Speed'].count().sort_values(ascending=False).head(20).plot.barh()

ax.bar_label(ax.containers[0])

plt.ylabel("(hour, minute)")
plt.title(">50km/h instances by hour and minute")
plt.show()

In [None]:
# @title >50km/h instances by line number top 15 entries

ax = speeding.groupby('Lines')['Speed'].count().sort_values(ascending=False).head(15).plot.barh()

ax.bar_label(ax.containers[0])

plt.title(">50km/h instances by line number top 15 entries")
plt.show()

In [None]:
# @title percentage of buses >50km/h by district

all_map = gpd.sjoin(left_df=gdf, right_df=wawmap[['name', 'geometry']], how="left", predicate="intersects")
all_map = all_map.drop(columns=['index_right']).dropna()

df2 = 100 * all_map[all_map.Speed > 50].groupby('name')['Speed'].sum() / all_map.groupby('name')['Speed'].sum()

ax = df2.sort_values().plot.barh()

ax.bar_label(ax.containers[0])

plt.title("percentage of buses >50km/h by district")
plt.show()