### Import library and data

In [1]:
import matplotlib.pyplot as plt
import csv, datetime
import numpy as np
import pandas as pd
import os
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
import time

# open the file
df = pd.read_csv("ubike.csv", encoding = "ISO-8859-1" )
df.head()

Unnamed: 0,time,id,station,address,latitude,longitude,location,status,lot,bike,empty,weather,temp,pressure,humidity,wind
0,2015/9/7 12:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),"Sec,4. Zhongxiao E.Rd/GuangFu S. Rd",25.041,121.556945,Daan Dist.,1,48,9,36,No Rain,299.15,1009,78,3.6
1,2015/9/7 13:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),"Sec,4. Zhongxiao E.Rd/GuangFu S. Rd",25.041,121.556945,Daan Dist.,1,48,4,41,No Rain,300.15,1008,78,3.1
2,2015/9/7 14:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),"Sec,4. Zhongxiao E.Rd/GuangFu S. Rd",25.041,121.556945,Daan Dist.,1,48,5,40,No Rain,300.15,1007,74,4.6
3,2015/9/7 15:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),"Sec,4. Zhongxiao E.Rd/GuangFu S. Rd",25.041,121.556945,Daan Dist.,1,48,16,29,No Rain,300.15,1007,78,3.1
4,2015/9/7 16:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),"Sec,4. Zhongxiao E.Rd/GuangFu S. Rd",25.041,121.556945,Daan Dist.,1,48,22,23,Rain,299.7,1007,78,4.1


### Drop the columns that we are not going to use and add a column for the upcoming process

In [2]:
df.drop(columns = ['address', 'latitude', 'longitude', 'location', 'weather', 'temp', 'pressure', 'humidity', 'wind'], inplace=True)
df.head()

Unnamed: 0,time,id,station,status,lot,bike,empty
0,2015/9/7 12:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),1,48,9,36
1,2015/9/7 13:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),1,48,4,41
2,2015/9/7 14:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),1,48,5,40
3,2015/9/7 15:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),1,48,16,29
4,2015/9/7 16:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),1,48,22,23


### Add weekday and o'clock in the dataframe

In [3]:
%%time
weekday = []
oclock  = []

for i in range(len(df)):
    
    time = datetime.datetime.strptime(df.iloc[i][0], "%Y/%m/%d %H:%M")
    day  = datetime.date.isoweekday(time)
    
    weekday.append(day)
    oclock.append(time.hour)

Wall time: 2.55 s


In [4]:
df['weekday']  = weekday
df['o\'clock'] = oclock
df.head()

Unnamed: 0,time,id,station,status,lot,bike,empty,weekday,o'clock
0,2015/9/7 12:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),1,48,9,36,1,12
1,2015/9/7 13:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),1,48,4,41,1,13
2,2015/9/7 14:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),1,48,5,40,1,14
3,2015/9/7 15:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),1,48,16,29,1,15
4,2015/9/7 16:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),1,48,22,23,1,16


In [5]:
df = df.sort_values('id')
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,time,id,station,status,lot,bike,empty,weekday,o'clock
0,2015/9/7 12:21,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),1,48,9,36,1,12
1,2015/9/27 23:34,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),1,48,19,29,7,23
2,2015/9/28 00:34,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),0,48,0,0,1,0
3,2015/9/28 01:34,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),0,48,0,0,1,1
4,2015/9/28 02:34,2,MRT S.Y.S Memorial Hall Stataion(Exit 2.),0,48,0,0,1,2


### Record the start and end index of each station

In [6]:
id_dict = {}

for i in range(len(df)):
    ID = df.iloc[i][1]
    
    if ID not in id_dict:
        start = i
        id_dict[ID] = [start, -1]
    else:
        end = i
        id_dict[ID] = [start, end]
    
    if i == len(df)-1:
        end = i
        id_dict[ID] = [start, end]

### Create a list of all stations

In [7]:
stations = list(df.station.unique())

### Make a dictionary to pair up each station with it's ID

In [8]:
stations_id = {}

for s in stations:
    station_series = df.loc[df['station'] == s]['id'].reset_index(drop=True)
    stations_id[s] = station_series[0]

### Main function

In [9]:
@interact_manual

def compare_2(station=stations):
    bike = {}        # dict contains the number of bike available in each hour
    local = {}       # dict contains the capacity of bike of a specific station in each hour
    total = {}       # dict contains the capacity of bike of all station in each hour
    count = {}       # dict contains 'each hour' is form by how many rows
    
    clock_group = list(df.groupby('o\'clock').sum().index)
    for i in clock_group:
        total[i] = df.groupby('o\'clock').sum()['bike'][i]

    # processing the data
    for i in range(len(df)):
        hour = df.iloc[i][8]    
        
        if df.iloc[i][2] == station and df.iloc[i][3] != 0:            
            if hour not in bike:
                bike[hour] = int(df.iloc[i][5])
                local[hour] = int(df.iloc[i][5] + df.iloc[i][6])
                count[hour] = 1
            else:
                bike[hour] += int(df.iloc[i][5])
                local[hour] += int(df.iloc[i][5] + df.iloc[i][6])
                count[hour] += 1       

                
    # preparing for plotting
    time_seq = bike.keys()
    time_seq = sorted(time_seq)
    avg  = []
    lot  = []
    rate = []    # number of bikes of the station divided by the number of total bikes available in each hour

    for k in time_seq:
        i = float( bike[k]  / count[k])     # average bikes available of a specific station in each hour
        j = float( local[k] / count[k])     # average capacity of a specific station in each hour
        avg.append(i)
        lot.append(j)
        rate.append(bike[k]/total[k])    # rate of bikes available of a specific station to capacity of bike of all station in each hour
   
    df_dum = pd.DataFrame(list(zip(avg, lot, rate)), columns =['avg', 'lot', 'rate'])



    # plotting the data in avg and lot
    fig, ax = plt.subplots()
    
    plot_title = str('Information of the bikes at ' + station + ' station.')
    df_dum[['avg', 'lot']].plot(xticks=np.arange(0, 24, step=1), kind='line', stacked=False, ax=ax, grid=False, figsize=(20,12))
    ax.set_title(plot_title,fontsize= 20)
    
    ax2 = ax.twinx()
    ax2.plot(ax.get_xticks(), df_dum['rate'], linestyle='-', marker='o', linewidth=2, color = 'g', label = 'rate')
    lines, labels = ax.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax.legend(lines + lines2, ['average', 'capacity'] + labels2, bbox_to_anchor=(1.03, 1), loc='upper left', prop={'size': 20})
    ax.yaxis.set_ticks_position("right")
    ax2.yaxis.set_ticks_position("left")

interactive(children=(Dropdown(description='station', options=('MRT S.Y.S Memorial Hall Stataion(Exit 2.)', 'M…

In [10]:
@interact_manual

def compare_faster(station=stations):
    bike = {}        # dict contains the number of bike available in each hour
    local = {}       # dict contains the capacity of bike of a specific station in each hour
    total = {}       # dict contains the capacity of bike of all station in each hour
    count = {}       # dict contains 'each hour' is form by how many rows
    ID = stations_id[station]
    index = id_dict[ID]
    start_index = index[0]
    end_index   = index[1]
    
    
    clock_group = list(df.groupby('o\'clock').sum().index)
    for i in clock_group:
        total[i] = df.groupby('o\'clock').sum()['bike'][i]

    # processing the data
    for i in range(start_index, end_index):
        hour = df.iloc[i][8]    
        
        if df.iloc[i][3] != 0:            
            if hour not in bike:
                bike[hour] = int(df.iloc[i][5])
                local[hour] = int(df.iloc[i][5] + df.iloc[i][6])
                count[hour] = 1
            else:
                bike[hour] += int(df.iloc[i][5])
                local[hour] += int(df.iloc[i][5] + df.iloc[i][6])
                count[hour] += 1
                
    # preparing for plotting
    time_seq = bike.keys()
    time_seq = sorted(time_seq)
    avg  = []
    lot  = []
    rate = []    # number of bikes of the station divided by the number of total bikes available in each hour

    for k in time_seq:
        i = float( bike[k]  / count[k])     # average bikes available of a specific station in each hour
        j = float( local[k] / count[k])     # average capacity of a specific station in each hour
        r = float( total[k] / count[k] )    # average capacity of bike of all station in each hour
        avg.append(i)
        lot.append(j)
        rate.append(i/r)    # rate of bikes available of a specific station to capacity of bike of all station in each hour
   
    df_dum = pd.DataFrame(list(zip(avg, lot, rate)), columns =['avg', 'lot', 'rate'])



    # plotting the data in avg and lot
    fig, ax = plt.subplots()
    
    plot_title = str('Information of the bikes at ' + station + ' station.')
    df_dum[['avg', 'lot']].plot(xticks=np.arange(0, 24, step=1), kind='line', stacked=False, ax=ax, grid=False, figsize=(20,12))
    ax.set_title(plot_title,fontsize= 20)
    
    ax2 = ax.twinx()
    ax2.plot(ax.get_xticks(), df_dum['rate'], linestyle='-', marker='o', linewidth=2, color = 'g', label = 'rate')
    lines, labels = ax.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax.legend(lines + lines2, ['average', 'capacity'] + labels2, bbox_to_anchor=(1.03, 1), loc='upper left', prop={'size': 20})
    ax.yaxis.set_ticks_position("right")
    ax2.yaxis.set_ticks_position("left")

interactive(children=(Dropdown(description='station', options=('MRT S.Y.S Memorial Hall Stataion(Exit 2.)', 'M…

## Everything ends here, below is just some raw draft.