## Example pull, for December 2016

In [1]:
import pandas as pd
import requests

In [2]:
%matplotlib inline

In [7]:
from zipfile import ZipFile

In [4]:
r = requests.get("https://s3.amazonaws.com/tripdata/201612-citibike-tripdata.zip")

In [5]:
import io

myzip = ZipFile(io.BytesIO(r.content))

In [9]:
with myzip.open(myzip.filelist[0].filename) as fp:
    data = pd.read_csv(fp)

In [10]:
data.head()

Unnamed: 0,Trip Duration,Start Time,Stop Time,Start Station ID,Start Station Name,Start Station Latitude,Start Station Longitude,End Station ID,End Station Name,End Station Latitude,End Station Longitude,Bike ID,User Type,Birth Year,Gender
0,528,2016-12-01 00:00:04,2016-12-01 00:08:52,499,Broadway & W 60 St,40.769155,-73.981918,228,E 48 St & 3 Ave,40.754601,-73.971879,26931,Subscriber,1964.0,1
1,218,2016-12-01 00:00:28,2016-12-01 00:04:06,3418,Plaza St West & Flatbush Ave,40.675021,-73.971115,3358,Garfield Pl & 8 Ave,40.671198,-73.974841,27122,Subscriber,1955.0,1
2,399,2016-12-01 00:00:39,2016-12-01 00:07:19,297,E 15 St & 3 Ave,40.734232,-73.986923,345,W 13 St & 6 Ave,40.736494,-73.997044,19352,Subscriber,1985.0,1
3,254,2016-12-01 00:00:44,2016-12-01 00:04:59,405,Washington St & Gansevoort St,40.739323,-74.008119,358,Christopher St & Greenwich St,40.732916,-74.007114,20015,Subscriber,1982.0,1
4,1805,2016-12-01 00:00:54,2016-12-01 00:31:00,279,Peck Slip & Front St,40.707873,-74.00167,279,Peck Slip & Front St,40.707873,-74.00167,23148,Subscriber,1989.0,1


In [11]:
data['Start Time'] = pd.to_datetime(data['Start Time'])

In [12]:
accum_start = (data
    .set_index('Start Time')
    .groupby([pd.TimeGrouper(freq='H'), 'Start Station Name'])
    .count()
    .reset_index())

In [13]:
accum_start = accum_start[['Start Time', 'Start Station Name', 'Trip Duration']]\
    .rename(columns={'Trip Duration': 'Outbounds'})

In [14]:
accum_totals = accum_start.groupby('Start Station Name').apply(
    lambda df: df.set_index('Start Time')\
        .reindex(
            pd.DatetimeIndex(freq='H', start=pd.to_datetime('2016-12-01'), end=pd.to_datetime('2017-01-01')),
            fill_value=0
        )\
        .drop('Start Station Name', axis='columns')\
        .reset_index()
).reset_index().drop('level_1', axis='columns').rename(columns={'index': 'Time'})

In [15]:
identifying_information = data.set_index('Start Station Name')[
    ['Start Station Latitude', 'Start Station Longitude', 'Start Station ID']
].drop_duplicates()

In [16]:
accum_totals = accum_totals.join(identifying_information, on='Start Station Name')

Now to add in ending information.

In [17]:
data['Stop Time'] = pd.to_datetime(data['Stop Time'])

In [18]:
accum_end = (data
    .set_index('Stop Time')
    .groupby([pd.TimeGrouper(freq='H'), 'End Station Name'])
    .count()
    .reset_index())

In [19]:
accum_end = accum_end[['Stop Time', 'End Station Name', 'Trip Duration']]\
    .rename(columns={'Trip Duration': 'Inbounds'})

In [20]:
accum_end = accum_end.groupby('End Station Name').apply(
    lambda df: df.set_index('Stop Time')\
        .reindex(
            pd.DatetimeIndex(freq='H', start=pd.to_datetime('2016-12-01'), end=pd.to_datetime('2017-01-01')),
            fill_value=0
        )\
        .drop('End Station Name', axis='columns')\
        .reset_index()
).reset_index().drop('level_1', axis='columns').rename(columns={'index': 'Time'})

In [21]:
accum_end.head()

Unnamed: 0,End Station Name,Time,Inbounds
0,1 Ave & E 16 St,2016-12-01 00:00:00,2
1,1 Ave & E 16 St,2016-12-01 01:00:00,1
2,1 Ave & E 16 St,2016-12-01 02:00:00,0
3,1 Ave & E 16 St,2016-12-01 03:00:00,1
4,1 Ave & E 16 St,2016-12-01 04:00:00,0


In [22]:
accum_totals = pd.merge(accum_totals, accum_end, how='outer', left_on=['Start Station Name', 'Time'], right_on=['End Station Name', 'Time'])

In [23]:
accum_totals = accum_totals.dropna()

In [24]:
accum_totals['Outbounds'] = accum_totals['Outbounds'].astype(int)

In [26]:
accum_totals['Start Station ID'] = accum_totals['Start Station ID'].astype(int)

## All together now...