In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import folium
import datetime

In [2]:
folium.Map(location = [40.71958611647166, -74.0431174635887],
           tiles='cartodbpositron',
           zoom_start=12)

In [3]:
# Importing data
nyc = pd.read_csv('/Users/santiagotoso/GoogleDrive/Master/Python/Bikes Data/data/nyc_data.csv')
nyc.head(2)

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,bikeid,usertype,birth year,gender
0,311,2019-05-01 00:07:36.4670,2019-05-01 00:12:47.6350,3185,City Hall,40.717733,-74.043845,3279,Dixon Mills,40.72163,-74.049968,29224,Subscriber,1988,1
1,538,2019-05-01 00:08:29.2840,2019-05-01 00:17:27.9410,3681,Grand St,40.715178,-74.037683,3203,Hamilton Park,40.727596,-74.044247,26170,Subscriber,1974,1


In [4]:
type(nyc.loc[0,'starttime'])

str

In [5]:
# Setting the right format for starttime and stoptime
nyc['starttime'] = nyc['starttime'].str[:-5]
nyc['stoptime'] = nyc['stoptime'].str[:-5]
nyc['starttime'] = pd.to_datetime(nyc['starttime'])
nyc['stoptime'] = pd.to_datetime(nyc['stoptime'])

# Define the startime as index
nyc = nyc.set_index('starttime')
nyc['type'] = 'station'
nyc.head(1)

Unnamed: 0_level_0,tripduration,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,bikeid,usertype,birth year,gender,type
starttime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2019-05-01 00:07:36,311,2019-05-01 00:12:47,3185,City Hall,40.717733,-74.043845,3279,Dixon Mills,40.72163,-74.049968,29224,Subscriber,1988,1,station


In [6]:
nyc['type'] = 'station'
nyc.head(1)

Unnamed: 0_level_0,tripduration,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,bikeid,usertype,birth year,gender,type
starttime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2019-05-01 00:07:36,311,2019-05-01 00:12:47,3185,City Hall,40.717733,-74.043845,3279,Dixon Mills,40.72163,-74.049968,29224,Subscriber,1988,1,station


# TimeStampedGeoJson

In [7]:
# Aggregate number of trips for each start station by hour of the day
start = nyc.pivot_table('tripduration', 
                     index = ['start station id', 
                              'start station latitude', 
                              'start station longitude', 
                              nyc.index.hour],
                     columns = 'type',
                     aggfunc='count').reset_index()

start.head()

type,start station id,start station latitude,start station longitude,starttime,station
0,3183,40.716247,-74.033459,0,8
1,3183,40.716247,-74.033459,1,5
2,3183,40.716247,-74.033459,2,4
3,3183,40.716247,-74.033459,5,2
4,3183,40.716247,-74.033459,6,3


In [8]:
days = nyc.index.day.max()
start['station'] = start['station']/days

In [9]:
start.columns = ['station_id', 'lat', 'lon', 'hour', 'count']
start['fillColor'] = '#53c688'
start.loc[start['count']<1, 'fillColor'] = '#586065'
start.head(1)

Unnamed: 0,station_id,lat,lon,hour,count,fillColor
0,3183,40.716247,-74.033459,0,0.258065,#586065


In [10]:
import datetime
def create_geojson_features(df):
    features = []
    
    for _, row in df.iterrows():
        feature = {
            'type': 'Feature',
            'geometry': {
                'type':'Point', 
                'coordinates':[row['lon'],row['lat']]
            },
            'properties': {
                'time': pd.to_datetime(row['hour'], unit='h').__str__(),
                'style': {'color' : ''},
                'icon': 'circle',
                'iconstyle':{
                    'fillColor': row['fillColor'],
                    'fillOpacity': 0.8,
                    'stroke': 'true',
                    'radius': row['count'] + 5
                }
            }
        }
        features.append(feature)
    return features

start_geojson = create_geojson_features(start)
start_geojson[0]

{'type': 'Feature',
 'geometry': {'type': 'Point', 'coordinates': [-74.0334588, 40.7162469]},
 'properties': {'time': '1970-01-01 00:00:00',
  'style': {'color': ''},
  'icon': 'circle',
  'iconstyle': {'fillColor': '#586065',
   'fillOpacity': 0.8,
   'stroke': 'true',
   'radius': 5.258064516129032}}}

In [11]:
from folium.plugins import TimestampedGeoJson

nyc_map = folium.Map(location = [40.71958611647166, -74.0431174635887],
                    tiles = "CartoDB Positron",
                    zoom_start = 14)

TimestampedGeoJson(start_geojson,
                  period = 'PT1H',
                  duration = 'PT1M',
                  transition_time = 1000,
                  auto_play = True).add_to(nyc_map)
nyc_map

# DualMap

In [12]:
nyc1= nyc.reset_index().set_index('stoptime')
end = nyc1.pivot_table('tripduration', 
                     index = ['end station id', 
                              'end station latitude', 
                              'end station longitude', 
                              nyc1.index.hour],
                     columns = 'type',
                     aggfunc='count').reset_index()

end['station'] = end['station']/days

end.columns = ['station_id', 'lat', 'lon', 'hour', 'count']
end['fillColor'] = '#e64c4e'
end.loc[end['count']<1, 'fillColor'] = '#586065'

In [13]:
end_geojson = create_geojson_features(end)

In [14]:
from folium.plugins import DualMap

dualmap = DualMap(location = [40.71958611647166, -74.0431174635887],
                 tiles = 'cartodbpositron',
                 zoom_start = 14)

TimestampedGeoJson(start_geojson,
                  period = 'PT1H',
                  duration = 'PT1M',
                  transition_time = 250,
                  auto_play = True).add_to(dualmap.m1)

TimestampedGeoJson(end_geojson,
                  period = 'PT1H',
                  duration = 'PT1M',
                  transition_time = 250,
                  auto_play = True).add_to(dualmap.m2)

dualmap

# AntPath

In [15]:
df = nyc.pivot_table('tripduration', 
                     index = ['start station name', 
                              'end station name',],
                     columns = ['type'],
                     aggfunc='count').reset_index().sort_values(by='station', ascending=False)
df.head()

type,start station name,end station name,station
683,Hamilton Park,Grove St PATH,823
637,Grove St PATH,Hamilton Park,597
149,Brunswick & 6th,Grove St PATH,458
959,Jersey & 6th St,Grove St PATH,432
187,Brunswick St,Grove St PATH,396


In [16]:
mask1 = df["start station name"].head(10)
mask2 = df["end station name"].head(10)
mask = mask1.append(mask2)
mask = mask.unique()
mask

array(['Hamilton Park', 'Grove St PATH', 'Brunswick & 6th',
       'Jersey & 6th St', 'Brunswick St', 'Dixon Mills',
       'Marin Light Rail'], dtype=object)

In [17]:
import cufflinks

top_stations = df.loc[(df['start station name'].isin(mask)) & (df['end station name'].isin(mask)),:].reset_index()
to_plot = top_stations.pivot_table('station', 
                         index = 'start station name',
                        columns = 'end station name',
                        aggfunc = 'mean')

to_plot.iplot(kind = 'heatmap', 
              colorscale = 'Blues', 
             xTitle = 'Start Station')

In [18]:
# Get the peak hours
nyc['time_of_day'] = pd.cut(nyc.index.hour,
                            [0,6,10,16,20,24],
                            labels = ['am_valley', 'am_peak', 'mid_valley', 'pm_peak', 'pm_valley'], 
                            right=False)

am = nyc.loc[nyc['time_of_day'] == 'am_peak', :]
pm = nyc.loc[nyc['time_of_day'] == 'pm_peak', :]

In [19]:
# We'll take the station 'Grove St PATH and analyze it's commutes patterns 
# in the morning and the afternoon

to_st_path = am.loc[(am['end station name'] == 'Grove St PATH') & (am['start station name'].isin(mask)), :]
from_st_path = pm.loc[(pm['start station name'] == 'Grove St PATH') & (pm['end station name'].isin(mask)), :]

to_st_path.head(1)

Unnamed: 0_level_0,tripduration,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,bikeid,usertype,birth year,gender,type,time_of_day
starttime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2019-05-01 06:01:06,220,2019-05-01 06:04:46,3269,Brunswick & 6th,40.726012,-74.050389,3186,Grove St PATH,40.719586,-74.043117,26307,Subscriber,1989,2,station,am_peak


In [20]:
to_st_path = to_st_path.pivot_table('tripduration',
                                   index = ['start station name', 
                                            'start station latitude',
                                            'start station longitude',
                                            'end station name',
                                            'end station latitude',
                                            'end station longitude'],
                                   columns = ['type'],
                                   aggfunc='count').reset_index().sort_values(by='station', ascending=False)

to_st_path

type,start station name,start station latitude,start station longitude,end station name,end station latitude,end station longitude,station
4,Hamilton Park,40.727596,-74.044247,Grove St PATH,40.719586,-74.043117,628
0,Brunswick & 6th,40.726012,-74.050389,Grove St PATH,40.719586,-74.043117,358
5,Jersey & 6th St,40.725289,-74.045572,Grove St PATH,40.719586,-74.043117,288
2,Dixon Mills,40.72163,-74.049968,Grove St PATH,40.719586,-74.043117,287
1,Brunswick St,40.724176,-74.050656,Grove St PATH,40.719586,-74.043117,275
6,Marin Light Rail,40.714584,-74.042817,Grove St PATH,40.719586,-74.043117,188
3,Grove St PATH,40.719586,-74.043117,Grove St PATH,40.719586,-74.043117,11


In [21]:
from_st_path = from_st_path.pivot_table('tripduration',
                                   index = ['start station name', 
                                            'start station latitude',
                                            'start station longitude',
                                            'end station name',
                                            'end station latitude',
                                            'end station longitude'],
                                   columns = ['type'],
                                   aggfunc='count').reset_index().sort_values(by='station', ascending=False)

from_st_path

type,start station name,start station latitude,start station longitude,end station name,end station latitude,end station longitude,station
4,Grove St PATH,40.719586,-74.043117,Hamilton Park,40.727596,-74.044247,365
0,Grove St PATH,40.719586,-74.043117,Brunswick & 6th,40.726012,-74.050389,265
2,Grove St PATH,40.719586,-74.043117,Dixon Mills,40.72163,-74.049968,212
6,Grove St PATH,40.719586,-74.043117,Marin Light Rail,40.714584,-74.042817,205
1,Grove St PATH,40.719586,-74.043117,Brunswick St,40.724176,-74.050656,162
5,Grove St PATH,40.719586,-74.043117,Jersey & 6th St,40.725289,-74.045572,124
3,Grove St PATH,40.719586,-74.043117,Grove St PATH,40.719586,-74.043117,17


In [22]:
# The weight scale will be from 1-10 
# 365 --> 10
# 124 --> 1

# 365station/10weight=124station/1weight --> 1weight=124station*10weight/365station
# 265*10/365= 7.26

# The delay has to be higher for lower numbers
# To
# y=a*x + b --> x=628 --> y=100 --> 100=628*a + b --> b = 100 - 628*a --> b = 
# y=a*x + b --> 800=188*a + 100 - 628*a --> 700 = (188 - 628)*a = -440*a --> a=-700/440 --> a = -2.9

# From
# y=a*x + b --> x=365 --> y=100 --> 100=365*a + b --> b = 100 - 365*a --> b = 1160
# y=a*x + b --> 800=124*a + 100 - 365*a --> 700 = (124 - 365)*a = -241*a --> a=-700/241 --> a = -2.9

to_st_path['weight'] = to_st_path['station']*10/(to_st_path['station'].max())
from_st_path['weight'] = from_st_path['station']*10/(from_st_path['station'].max())

a_to_st_path = (800-100)/(to_st_path['station'].min() - to_st_path['station'].max())
b_to_st_path = 100 - to_st_path['station'].max()*a_to_st_path

a_from_st_path = (800-100)/(from_st_path['station'].min() - from_st_path['station'].max())
b_from_st_path = 100 - from_st_path['station'].max()*a_from_st_path

to_st_path['delay'] = a_to_st_path*to_st_path['station'] + b_to_st_path
from_st_path['delay'] = a_from_st_path*from_st_path['station'] + b_from_st_path

In [23]:
from folium.plugins import DualMap

dualmap = DualMap(location = [to_st_path.loc[0, 'end station latitude'], to_st_path.loc[0, 'end station longitude']],
                         tiles='cartodbpositron',
                         zoom_start=15)

to_st_path.apply(lambda row: folium.plugins.AntPath([(row['start station latitude'],
                                                      row['start station longitude']),
                                                     (row['end station latitude'],
                                                      row['end station longitude'])],
                                                    color='blue',
                                                    weight = row['weight'],
                                                    delay = row['delay']).add_to(dualmap.m1),
                   axis=1)

from_st_path.apply(lambda row: folium.plugins.AntPath([(row['start station latitude'],
                                                        row['start station longitude']),
                                                       (row['end station latitude'],
                                                        row['end station longitude'])],
                                                      color='blue',
                                                      weight = row['weight'],
                                                      delay = row['delay']).add_to(dualmap.m2),
                   axis=1)

dualmap

# Heatmap

In [24]:
df_hour_list = []
hours = pd.Series(nyc.index.hour.unique().sort_values())

def create_list(hour):
    df_hour_list.append(nyc.loc[nyc.index.hour == hour,
                                ['start station latitude',
                                 'start station longitude']].
                        groupby(['start station latitude', 
                                 'start station longitude']).sum().reset_index().values.tolist())
    
hours.apply(create_list);

In [27]:
# This map shows a static view of the stops with their radius depending on the total amount of trips
# and a dynamic view of the events that take place as a heatmap

from folium.plugins import HeatMapWithTime

# Add trip events to the map
map_time = folium.Map(location=[40.71958611647166, -74.0431174635887], tiles="CartoDB Positron", zoom_start=14)

HeatMapWithTime(df_hour_list, 
                auto_play=True, 
                max_opacity=0.5, 
                gradient = {0.2: '#FBD973', 0.4: '#fa782f', 0.75: '#F16578', 1: '#782890'}).add_to(map_time)

map_time