# Explore

Contents:
- Explore the data set

## Notebook parameters

In [None]:
zhbikes_data = '../data/preprocessed/zhbikes.feather'

## Read the preprocessed data

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
df = pd.read_feather(zhbikes_data)

In [None]:
df.head()

## Question: How many stations do we find in the data?

In [None]:
stations = df['counting_station'].unique()

In [None]:
stations.shape

## Let's plot the data for each station, binning on a weekly basis

In [None]:
def sum_by_week(station):
    df_station = df[df['counting_station'] == station]
    aggregated = df_station.set_index('datetime').resample('1W').agg({'velo_in' : 'sum', 'velo_out': 'sum'}).reset_index()
    aggregated['velo_all'] = aggregated['velo_out'] + aggregated['velo_in']
    aggregated['velo_out'] = -aggregated['velo_out']
    aggregated['counting_station'] = station
    return aggregated

In [None]:
dataframes_weekly = [sum_by_week(station) for station in stations]
df_weekly = pd.concat(dataframes_weekly).reset_index(drop=True)

In [None]:
grid = sns.FacetGrid(df_weekly, col="counting_station", hue="counting_station", palette="tab20c",
                     col_wrap=4, height=3, aspect=3)
grid.map(plt.plot, "datetime", "velo_in")
grid.map(plt.plot, "datetime", "velo_out")
grid.map(plt.axhline, y=0, ls=":", c=".5")

plt.show()

In [None]:
sns.set(rc={'figure.figsize':(15, 10)})
sns.lineplot(x="datetime", y="velo_all", hue="counting_station", data=df_weekly)

plt.show()