# Average number of bikes by windowing

This notebook calculates the average number of bikes at a given time by looking at all the moments that are comprised in a given interval around the given time. This performs slightly better than `challenge_2016_hourly_average` because it avoids the "step" effect - for example 14h59 is in the 14h00-15h00 interval but it's on the edge; it "feels" more right to look at the 14h29-15h29 interval. However this method is "lazier" and makes all it's calculations online whereas `challenge_2016_hourly_average` does it's calculations offline.

In [2]:
import datetime as dt
import pandas as pd


def get_station_updates(city, station):
    df = pd.read_csv('challenge_data/part_1/{}/stations/{}.csv'.format(city, station))
    df['moment'] = pd.to_datetime(df['moment'])
    return df


def add(t, delta):
    return (dt.datetime.combine(dt.date(1,1,1), t) + delta).time()

d = dt.timedelta(minutes=20)

def window_mean_bikes(city, station, timestamps):
    df = get_station_updates(city, station)
    df['on'] = df['moment'].map(lambda x: x.weekday())
    df['at'] = df['moment'].map(lambda x: x.time())
    means = {
        t: df[
            (df['on'] == t.weekday()) &
            (df['at'] < add(t.time(), d)) &
            (add(t.time(), -d) < df['at'])
        ]['bikes'].mean()
        for t in timestamps
    }
    return means


to_predict_df = pd.read_csv('challenge_data/part_1/test-blank.csv', index_col=0)
to_predict_df['moment'] = pd.to_datetime(to_predict_df.index)

city = 'toulouse'
station = '00003-pomme'
timestamps = to_predict_df[to_predict_df['station'] == station]['moment'].tolist()

means = {
    city_name: {
        station_name: window_mean_bikes(
            city_name,
            station_name,
            timestamps
        )
        for station_name in rows['station'].unique()
    }
    for city_name, rows in to_predict_df.groupby('city')
}

In [3]:
to_predict_df['bikes'] = to_predict_df.apply(lambda r: means[r['city']][r['station']][r['moment']], axis=1).tolist()
to_predict_df.drop('moment', axis=1, inplace=True)
to_predict_df.to_csv('submission.csv')