# Zurich Bike Data

How many people cycle on an average weekday in Zurich?

In [None]:
# fetch the data if you don't have it
#!wget https://data.stadt-zuerich.ch/storage/f/verkehrszaehlungen_werte_fussgaenger_velo/2016_verkehrszaehlungen_werte_fussgaenger_velo.csv

In [None]:
%matplotlib inline

In [None]:
import numpy as np
import pandas as pd

Read in the 2016 data. This dataset contains the number of bikes and pedestrians passing
all counting stations every 15minutes.

Instruct the CSV reader that the `Datum` is a date, and use it as index.

In [None]:
df = pd.read_csv("2016_verkehrszaehlungen_werte_fussgaenger_velo.csv",
                 parse_dates=['Datum'], index_col='Datum')

In [None]:
# look at the first few entries
df.head()

In [None]:
# five random entries
df.sample(5)

In [None]:
# for each moment in time there are several entries
df.loc["2016-02-15 13:45:00"].shape

In [None]:
df.loc['2016-02-07 19:45:00'].shape

In [None]:
# how many counting stations does Zurich have and how many observations
# has each one made?
df.Standort.value_counts()

# Focus on Y2G13124879

Not a gravitatnional wave, just one random counting station in Zurich.

This counter is on [Militaerbruecke](https://www.google.com/maps/place/Milit%C3%A4rbr%C3%BCcke,+8004+Z%C3%BCrich,+Switzerland/@47.3750396,8.5320315,17z/data=!4m5!3m4!1s0x47900a05078b48d5:0x63d0455461c631e7!8m2!3d47.3750563!4d8.534021). [Inward direction](https://www.google.com/maps/place/Langstrasse,+Z%C3%BCrich,+Switzerland/@47.3784256,8.5196336,15z/data=!3m1!4b1!4m5!3m4!1s0x47900a181f1fbc37:0x4ded91a7167d938a!8m2!3d47.3787733!4d8.5273363) [Outward direction](https://www.google.com/maps/place/Bahnhofstrasse,+8001+Z%C3%BCrich,+Switzerland/@47.3715832,8.5304294,15z/data=!3m1!4b1!4m5!3m4!1s0x47900a0a4578c013:0x96371feb65728644!8m2!3d47.3722923!4d8.539831)

In [None]:
# Focus on the data from one particular station
# Future idea: find out the name and location of each station
loc_Y2 = df[df.Standort == 'Y2G13124879']

In [None]:
# resample from 15min period to 1 entry per day
loc_Y2_daily = loc_Y2.resample('1D').sum()
loc_Y2_daily.head()

In [None]:
# Let's drop all the columns we aren't really interested in
# note the really evil column name
loc_Y2_daily.drop(labels=['﻿"ObjectID"', 'Fuss_in', 'Fuss_out'], axis=1, inplace=True)

In [None]:
# riders per day
loc_Y2_daily.plot(title='riders per day')

In [None]:
# access the `day` part of the date of each entry
loc_Y2_daily.index.day

In [None]:
# the index also knows the how many'th day of the week a day is
loc_Y2_daily.index.weekday

In [None]:
# create a new column to explicitly store the day of the week
loc_Y2_daily.loc[:, 'weekday'] = loc_Y2_daily.index.weekday

In [None]:
# Let's start with summing riders for each weekday
# groupby() groups rows by the value in the column you name
# by itself it doesn't do much
grouped = loc_Y2_daily.groupby('weekday')

In [None]:
# you now need to specify how it should combine all the values
# for each row with the same value in `weekday`
# Can calcualte the mean, standard deviation, sum, etc
average = grouped.aggregate(np.mean)
average

In [None]:
# can compute several things in one go
grouped.aggregate([np.mean, np.std, np.count_nonzero])

In [None]:
# or compute different things for each column
grouped.aggregate({'Velo_in': np.mean, 'Velo_out': np.std})

In [None]:
# can you remember which number corresponds to which day? I can't
# finally the plot we wanted to make
average.index = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
average.plot(kind='bar', title="average riders per weekday")