# Delta imcoming / outgoing bikes

In [313]:
#general imports
import pandas as pd
import numpy as np

#statsmodels for regression
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

#scipy for testing
from scipy import stats

#for visualization
import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime, timedelta

In [314]:
df_bikes = pd.read_csv('../data/philadelphia_2017.csv')
df_weather = pd.read_csv('../data/weather_hourly_philadelphia.csv')

def get_datetime(date_string):
    if date_string.__contains__("/"):
        new_string = ""
        running_string = ""
        for char in date_string:
            if char == "/" or  char == ":" or char == " ":
                if len(running_string) == 1:
                    running_string = "0" + running_string
                new_string += running_string + char
                running_string = ""
            else:
                running_string += char
        new_string += running_string
        if len(new_string) > 16:
            return datetime.strptime(new_string, '%m/%d/%Y %H:%M:%S')
        else:
            return datetime.strptime(new_string, '%m/%d/%Y %H:%M')
    else:
        return datetime.fromisoformat(date_string)

def compare_datetime(start, end):
    start_datetime = get_datetime(start)
    end_datetime = get_datetime(end)
    duration_of_travel = end_datetime - start_datetime
    seconds = duration_of_travel.total_seconds()
    return seconds/60

ride_lengths = []
df_bikes["ride_duration_minutes"] = df_bikes.apply(lambda x: (compare_datetime(x["start_time"],x["end_time"])), axis=1)

for label, content in df_bikes.iterrows():
    ride_lengths.append(content["ride_duration_minutes"])

ride_lengths_np = np.array(ride_lengths)
iqr = stats.iqr(ride_lengths_np)
iqr

q1,q3 = np.percentile(ride_lengths_np, [25,95])

lower_range = q1 - (1.5*iqr)
upper_range = q3 + (1.5*iqr)

#A ride, can't be shorter than 0 Minutes obviously
if lower_range < 0:
    lower_range = 0

df_bikes.drop(df_bikes[ (df_bikes.ride_duration_minutes > upper_range) | (df_bikes.ride_duration_minutes < lower_range) ].index , inplace=True)

Rounding times to minutes

In [316]:
s = 0
z = 1
for x in range(0,788906):
    if x not in df_bikes.index:
        s = s + 1
        if s == 1000*z:
            print(x/788906)
            z = z + 1
        else:
            pass
    else:
        df_bikes['start_time_rounded'][x] = get_datetime(df_bikes["start_time"][x]).replace(minute=0)
        df_bikes['end_time_rounded'][x] = get_datetime(df_bikes["end_time"][x]).replace(minute=0)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_bikes['start_time_rounded'][x] = get_datetime(df_bikes["start_time"][x]).replace(minute=0)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_bikes['end_time_rounded'][x] = get_datetime(df_bikes["end_time"][x]).replace(minute=0)


0.05804874091463368
0.08382874512299311
0.13764757778493256
0.16717707813098137
0.19686249058823233
0.2343067488395322
0.27538261846151507
0.30219823401013557
0.34052472664677413
0.3716501078708997
0.40765566493346483
0.43515450509946685
0.46883278869725925
0.5060704317117629
0.5477230493873795
0.579788973591277
0.6180913822432584
0.6582850174799025
0.6859739943668828
0.7313152644294758
0.7698521750373302
0.8191191345990524
0.8593343186640741
0.9027361941726898
0.9612095737641747


In [317]:
df_bikes_delta_outgoing_support = df_bikes.groupby(['start_station_name','start_time_rounded']).count().reset_index()
df_bikes_delta_incoming_support = df_bikes.groupby(['end_station_name','end_time_rounded']).count().reset_index()

In [318]:
df_bikes_delta_outgoing_support = df_bikes_delta_outgoing_support.sort_values(['start_station_name', 'start_time_rounded'])
df_bikes_delta_incoming_support = df_bikes_delta_incoming_support.sort_values(['end_station_name', 'end_time_rounded'])

In [319]:
beginning = df_bikes_delta_outgoing_support["start_time_rounded"][0]
print(beginning)

2017-01-01 00:00:00


In [320]:
df_stations = df_bikes_delta_outgoing_support['start_station_name'].unique()

In [321]:
df_stations

array(['10th & Chestnut', '10th & Federal', '11th & Pine, Kahn Park',
       '11th & Poplar, John F. Street Community Center', '11th & Reed',
       '11th & South', '11th & Washington', '12th & Callowhill',
       '12th & Filbert', '12th & Passyunk', '13th & Locust',
       '13th & Montgomery', '15th & Castle, DiSilvestro Playground',
       '15th & Market', '15th & South', '15th & Spruce', '17th & Girard',
       '17th & Pine', '17th & Spring Garden',
       '18th & Fernon, Aquinas Center', '18th & JFK',
       '18th & Washington, Chew Playground', '19th & Girard, PTTI',
       '19th & Lombard', '19th & Market', '20th & Fairmount',
       '20th & Market', '20th & Sansom', '21st & Catharine',
       '21st & Winter, Franklin Institute', '22nd & Cecil B. Moore',
       '22nd & Federal', '23rd & Fairmount', '23rd & Market',
       '23rd & South', '24th & Cecil B. Moore, Cecil B. Moore Library',
       '24th & Race SRT', '24th & Sansom', '25th & Locust',
       '27th & Girard', '27th & Mas

In [322]:
df_bikes_delta_o_support = df_bikes_delta_outgoing_support[['start_station_name','start_time_rounded','start_time']].copy()
df_bikes_o_delta = df_bikes_delta_o_support.rename(columns = {'start_station_name': 'Station Name', 'start_time_rounded': 'Time', 'start_time': 'Outgoing bikes'}, inplace = False)

In [323]:
df_bikes_delta_i_support = df_bikes_delta_incoming_support[['end_station_name','end_time_rounded','end_time']].copy()
df_bikes_i_delta = df_bikes_delta_i_support.rename(columns = {'end_station_name': 'Station Name', 'end_time_rounded': 'Time', 'end_time': 'Incoming bikes'}, inplace = False)

In [324]:
df_bikes_o_delta.head(10)

Unnamed: 0,Station Name,Time,Outgoing bikes
0,10th & Chestnut,2017-01-01 00:00:00,1
1,10th & Chestnut,2017-01-01 10:00:00,1
2,10th & Chestnut,2017-01-01 15:00:00,2
3,10th & Chestnut,2017-01-01 16:00:00,2
4,10th & Chestnut,2017-01-01 17:00:00,1
5,10th & Chestnut,2017-01-01 18:00:00,1
6,10th & Chestnut,2017-01-02 00:00:00,1
7,10th & Chestnut,2017-01-02 08:00:00,2
8,10th & Chestnut,2017-01-02 09:00:00,1
9,10th & Chestnut,2017-01-02 18:00:00,1


In [327]:
for m in range(0, df_stations.size):
    for n in range(0 , 8759):
        df_bikes_o_delta_support_new = {'Station Name': df_stations[m], 'Time': beginning + timedelta(hours=n), 'Outgoing bikes': 0}
        df_bikes_o_delta = df_bikes_o_delta.append(df_bikes_o_delta_support_new, ignore_index=True)
    print(df_stations[m])

10th & Chestnut
10th & Federal
11th & Pine, Kahn Park
11th & Poplar, John F. Street Community Center
11th & Reed
11th & South
11th & Washington
12th & Callowhill
12th & Filbert
12th & Passyunk
13th & Locust
13th & Montgomery
15th & Castle, DiSilvestro Playground
15th & Market
15th & South
15th & Spruce
17th & Girard
17th & Pine
17th & Spring Garden
18th & Fernon, Aquinas Center
18th & JFK
18th & Washington, Chew Playground
19th & Girard, PTTI
19th & Lombard
19th & Market
20th & Fairmount
20th & Market
20th & Sansom
21st & Catharine
21st & Winter, Franklin Institute
22nd & Cecil B. Moore
22nd & Federal
23rd & Fairmount
23rd & Market
23rd & South
24th & Cecil B. Moore, Cecil B. Moore Library
24th & Race SRT
24th & Sansom
25th & Locust
27th & Girard
27th & Master, Athletic Recreation Center
27th & South
29th & Dauphin
29th & Diamond
2nd & Germantown
2nd & Market
2nd & Race
2nd & South
3083
30th Street Station East
31st & Girard
32nd & Baring, Drexel Park
33rd & Dauphin
33rd & Diamond
33rd

In [328]:
df_bikes_o_delta.head(10)

Unnamed: 0,Station Name,Time,Outgoing bikes
0,10th & Chestnut,2017-01-01 00:00:00,1
1,10th & Chestnut,2017-01-01 10:00:00,1
2,10th & Chestnut,2017-01-01 15:00:00,2
3,10th & Chestnut,2017-01-01 16:00:00,2
4,10th & Chestnut,2017-01-01 17:00:00,1
5,10th & Chestnut,2017-01-01 18:00:00,1
6,10th & Chestnut,2017-01-02 00:00:00,1
7,10th & Chestnut,2017-01-02 08:00:00,2
8,10th & Chestnut,2017-01-02 09:00:00,1
9,10th & Chestnut,2017-01-02 18:00:00,1


In [366]:
df_bikes_i_delta = df_bikes_o_delta

In [335]:
df_stations.size

126

In [367]:
for m in range(0, df_stations.size):
        df_bikes_i_delta_support_new = {'Station Name': df_stations[m], 'Time': beginning + timedelta(hours=8759), 'Outgoing bikes': 0}
        df_bikes_i_delta = df_bikes_i_delta.append(df_bikes_i_delta_support_new, ignore_index=True)

In [368]:
df_bikes_i_delta.tail(20)

Unnamed: 0,Station Name,Time,Outgoing bikes
1454973,"Parkside & Belmont, Case Building",2017-12-31 23:00:00,0
1454974,Parkside & Girard,2017-12-31 23:00:00,0
1454975,Penn Treaty Park,2017-12-31 23:00:00,0
1454976,Pennsylvania & Fairmount Perelman Building,2017-12-31 23:00:00,0
1454977,Pennsylvania Convention Center,2017-12-31 23:00:00,0
1454978,Philadelphia Museum of Art,2017-12-31 23:00:00,0
1454979,Philadelphia Zoo,2017-12-31 23:00:00,0
1454980,Point Breeze & Tasker,2017-12-31 23:00:00,0
1454981,Race Street Pier,2017-12-31 23:00:00,0
1454982,Rittenhouse Square,2017-12-31 23:00:00,0


In [369]:
df_bikes_i_delta = df_bikes_i_delta.drop_duplicates(subset=['Station Name', 'Time'], keep='last', ignore_index=False)

In [370]:
df_bikes_i_delta = df_bikes_i_delta.rename(columns = {'Outgoing bikes': 'Incoming bikes'})

In [371]:
df_bikes_i_delta

Unnamed: 0,Station Name,Time,Incoming bikes
351233,10th & Chestnut,2017-01-01 00:00:00,0
351234,10th & Chestnut,2017-01-01 01:00:00,0
351235,10th & Chestnut,2017-01-01 02:00:00,0
351236,10th & Chestnut,2017-01-01 03:00:00,0
351237,10th & Chestnut,2017-01-01 04:00:00,0
...,...,...,...
1454988,"The Children's Hospital of Philadelphia, Osler...",2017-12-31 23:00:00,0
1454989,"Thompson & Palmer, Adaire School",2017-12-31 23:00:00,0
1454990,University City Station,2017-12-31 23:00:00,0
1454991,Virtual Station,2017-12-31 23:00:00,0


In [372]:
df_bikes_delta_i_support = df_bikes_delta_incoming_support[['end_station_name','end_time_rounded','end_time']].copy()
df_bikes_delta_i_support = df_bikes_delta_i_support.rename(columns = {'end_station_name': 'Station Name', 'end_time_rounded': 'Time', 'end_time': 'Incoming bikes'}, inplace = False)
df_bikes_i_delta = df_bikes_i_delta.append(df_bikes_delta_i_support, ignore_index=True)

In [374]:
df_bikes_i_delta = df_bikes_i_delta.drop_duplicates(subset=['Station Name', 'Time'], keep='last', ignore_index=False)

In [375]:
df_bikes_i_delta

Unnamed: 0,Station Name,Time,Incoming bikes
0,10th & Chestnut,2017-01-01 00:00:00,0
1,10th & Chestnut,2017-01-01 01:00:00,0
2,10th & Chestnut,2017-01-01 02:00:00,0
3,10th & Chestnut,2017-01-01 03:00:00,0
4,10th & Chestnut,2017-01-01 04:00:00,0
...,...,...,...
1446668,"Welcome Park, NPS",2017-12-28 18:00:00,1
1446669,"Welcome Park, NPS",2017-12-29 14:00:00,1
1446670,"Welcome Park, NPS",2017-12-29 15:00:00,1
1446671,"Welcome Park, NPS",2017-12-29 22:00:00,1


In [383]:
df_bikes_i_delta = df_bikes_i_delta.sort_values(['Station Name', 'Time'])
df_bikes_i_delta['Order'] = np.arange(len(df_bikes_i_delta))
df_bikes_i_delta = df_bikes_i_delta.set_index('Order')

In [388]:
df_bikes_i_delta

Unnamed: 0_level_0,Station Name,Time,Incoming bikes
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,10th & Chestnut,2017-01-01 00:00:00,0
1,10th & Chestnut,2017-01-01 01:00:00,0
2,10th & Chestnut,2017-01-01 02:00:00,0
3,10th & Chestnut,2017-01-01 03:00:00,0
4,10th & Chestnut,2017-01-01 04:00:00,0
...,...,...,...
1103755,"Welcome Park, NPS",2017-12-31 19:00:00,0
1103756,"Welcome Park, NPS",2017-12-31 20:00:00,0
1103757,"Welcome Park, NPS",2017-12-31 21:00:00,0
1103758,"Welcome Park, NPS",2017-12-31 22:00:00,0


In [386]:
df_bikes_o_delta = df_bikes_o_delta.drop_duplicates(subset=['Station Name', 'Time'], keep='first', ignore_index=False)
df_bikes_o_delta = df_bikes_o_delta.sort_values(['Station Name', 'Time'])
df_bikes_o_delta['Order'] = np.arange(len(df_bikes_o_delta))
df_bikes_o_delta = df_bikes_o_delta.set_index('Order')

In [389]:
for m in range(0, df_stations.size):
        df_bikes_o_delta_support_new = {'Station Name': df_stations[m], 'Time': beginning + timedelta(hours=8759), 'Outgoing bikes': 0}
        df_bikes_o_delta = df_bikes_o_delta.append(df_bikes_o_delta_support_new, ignore_index=True)

In [391]:
df_bikes_o_delta = df_bikes_o_delta.drop_duplicates(subset=['Station Name', 'Time'], keep='first', ignore_index=False)
df_bikes_o_delta = df_bikes_o_delta.sort_values(['Station Name', 'Time'])
df_bikes_o_delta['Order'] = np.arange(len(df_bikes_o_delta))
df_bikes_o_delta = df_bikes_o_delta.set_index('Order')

In [392]:
df_bikes_o_delta

Unnamed: 0_level_0,Station Name,Time,Outgoing bikes
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,10th & Chestnut,2017-01-01 00:00:00,1
1,10th & Chestnut,2017-01-01 01:00:00,0
2,10th & Chestnut,2017-01-01 02:00:00,0
3,10th & Chestnut,2017-01-01 03:00:00,0
4,10th & Chestnut,2017-01-01 04:00:00,0
...,...,...,...
1103755,"Welcome Park, NPS",2017-12-31 19:00:00,0
1103756,"Welcome Park, NPS",2017-12-31 20:00:00,0
1103757,"Welcome Park, NPS",2017-12-31 21:00:00,0
1103758,"Welcome Park, NPS",2017-12-31 22:00:00,0


In [395]:
df_bikes_delta_final = df_bikes_o_delta

In [396]:
df_bikes_delta_final['Incoming bikes'] = df_bikes_i_delta['Incoming bikes']

In [398]:
df_bikes_delta_final.head(50)

Unnamed: 0_level_0,Station Name,Time,Outgoing bikes,Incoming bikes
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,10th & Chestnut,2017-01-01 00:00:00,1,0
1,10th & Chestnut,2017-01-01 01:00:00,0,0
2,10th & Chestnut,2017-01-01 02:00:00,0,0
3,10th & Chestnut,2017-01-01 03:00:00,0,0
4,10th & Chestnut,2017-01-01 04:00:00,0,0
5,10th & Chestnut,2017-01-01 05:00:00,0,1
6,10th & Chestnut,2017-01-01 06:00:00,0,1
7,10th & Chestnut,2017-01-01 07:00:00,0,0
8,10th & Chestnut,2017-01-01 08:00:00,0,0
9,10th & Chestnut,2017-01-01 09:00:00,0,0


In [399]:
df_bikes_delta_final['Delta'] = df_bikes_delta_final['Incoming bikes'] - df_bikes_delta_final['Outgoing bikes']

In [401]:
df_bikes_delta_final.head(20)

Unnamed: 0_level_0,Station Name,Time,Outgoing bikes,Incoming bikes,Delta
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,10th & Chestnut,2017-01-01 00:00:00,1,0,-1
1,10th & Chestnut,2017-01-01 01:00:00,0,0,0
2,10th & Chestnut,2017-01-01 02:00:00,0,0,0
3,10th & Chestnut,2017-01-01 03:00:00,0,0,0
4,10th & Chestnut,2017-01-01 04:00:00,0,0,0
5,10th & Chestnut,2017-01-01 05:00:00,0,1,1
6,10th & Chestnut,2017-01-01 06:00:00,0,1,1
7,10th & Chestnut,2017-01-01 07:00:00,0,0,0
8,10th & Chestnut,2017-01-01 08:00:00,0,0,0
9,10th & Chestnut,2017-01-01 09:00:00,0,0,0


In [402]:
df_bikes_delta_final_year = df_bikes_delta_final.groupby(['Station Name']).sum()

In [403]:
df_bikes_delta_final_year

Unnamed: 0_level_0,Outgoing bikes,Incoming bikes,Delta
Station Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10th & Chestnut,8304,8546,242
10th & Federal,4622,4848,226
"11th & Pine, Kahn Park",11808,12244,436
"11th & Poplar, John F. Street Community Center",715,750,35
11th & Reed,7737,8137,400
...,...,...,...
"The Children's Hospital of Philadelphia, Osler Circle",4014,3777,-237
"Thompson & Palmer, Adaire School",1440,1436,-4
University City Station,15801,14433,-1368
Virtual Station,102,2280,2178


In [404]:
df_bikes_delta_final_year.sort_values(['Delta'])

Unnamed: 0_level_0,Outgoing bikes,Incoming bikes,Delta
Station Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
20th & Fairmount,9601,7573,-2028
17th & Spring Garden,9759,7994,-1765
Dornsife Center,5331,3571,-1760
27th & Girard,5488,3985,-1503
23rd & Fairmount,8967,7480,-1487
...,...,...,...
Pennsylvania Convention Center,6470,7472,1002
Moyamensing & Tasker,6365,7520,1155
4th & Bainbridge,12489,14119,1630
Virtual Station,102,2280,2178
