# Plot the stations

## Install Libraries

In [14]:
import pandas as pd
import numpy as np
import geopandas as gpd
import mpl_toolkits as mpl
import pyproj as proj
from shapely import geometry
import folium

%matplotlib inline

## Read in turnstile data with location info

In [2]:
turns = pd.read_pickle('data/turns_data_locations.pkl')

In [3]:
turns.head()

Unnamed: 0,c/a,unit,scp,station,linename,division,date,time,desc,entries,...,GTFS Stop ID,Division,Line,Stop Name,Borough,Daytime Routes,Structure,GTFS Latitude,GTFS Longitude,station_id
0,N220,R155,01-00-02,KINGSBRIDGE RD,BD,IND,03/26/2016,05:00:00,REGULAR,669738,...,D04,IND,Concourse,Kingsbridge Rd,Bx,B D,Subway,40.866978,-73.893509,Kingsbridge RdConcourse
1,N220,R155,01-00-02,KINGSBRIDGE RD,BD,IND,03/26/2016,09:00:00,REGULAR,669845,...,D04,IND,Concourse,Kingsbridge Rd,Bx,B D,Subway,40.866978,-73.893509,Kingsbridge RdConcourse
2,N220,R155,01-00-02,KINGSBRIDGE RD,BD,IND,03/26/2016,13:00:00,REGULAR,669956,...,D04,IND,Concourse,Kingsbridge Rd,Bx,B D,Subway,40.866978,-73.893509,Kingsbridge RdConcourse
3,N220,R155,01-00-02,KINGSBRIDGE RD,BD,IND,03/26/2016,17:00:00,REGULAR,670074,...,D04,IND,Concourse,Kingsbridge Rd,Bx,B D,Subway,40.866978,-73.893509,Kingsbridge RdConcourse
4,N220,R155,01-00-02,KINGSBRIDGE RD,BD,IND,03/26/2016,21:00:00,REGULAR,670139,...,D04,IND,Concourse,Kingsbridge Rd,Bx,B D,Subway,40.866978,-73.893509,Kingsbridge RdConcourse


In [4]:
turns.columns

Index(['c/a', 'unit', 'scp', 'station', 'linename', 'division', 'date', 'time',
       'desc', 'entries', 'exits', 'datetime', 'datetime_clean', 'year',
       'weekday', 'group', 'group_id', 'station_line', 'entries_diff',
       'exit_diff', 'first_row_group', 'entries_outlier', 'exit_outlier',
       'week', 'hour', 'timegroup', 'timegroupstr', 'wkdaynbr', 'Remote',
       'Booth', 'stop_id.2013', 'Station.2013', 'Line.2013', 'Division.2013',
       'Station ID', 'Complex ID', 'GTFS Stop ID', 'Division', 'Line',
       'Stop Name', 'Borough', 'Daytime Routes', 'Structure', 'GTFS Latitude',
       'GTFS Longitude', 'station_id'],
      dtype='object')

In [5]:
# We can drop some of these columns
to_drop = ['Remote', 'Booth', 'stop_id.2013', 'Station.2013', 'Line.2013', 'Division.2013', 'Station ID',
          'Complex ID', 'Division', 'Line', 'Daytime Routes', 'Structure']

turns.drop(to_drop, axis=1, inplace=True)

## Transform Location Data
### Project lat/long points from geo coordinates to x,y values

In [6]:
# Set projections
inProj = proj.Proj(init='epsg:4326') # assuming you're using WGS84 geographic
outProj = proj.Proj(init='epsg:27700') # use a locally appropriate projected CRS

In [7]:
def convert_to_point(lon_array, lat_array, inProj=inProj, outProj=outProj):
    """
    For a pair of geographic coordinate, project to x,y coordinates and return for use with spatial functions
    """
    points = []
    
    for lon, lat in zip(lon_array, lat_array):
        
        # Cast the geographic coordinate pair to the projected system
        x, y = proj.transform(inProj, outProj, lon, lat)
        pnt = geometry.Point(x, y)
        points.append(pnt)
    
    return points

Because I don't need to run the above function on every single row in my turnstile dataframe (many rows for a single coordinate pair) I'll find the distinct lat/long pairs and convert them to points then merge back to full dataset

In [8]:
# Build array of distinct lat/longs to convert to points
lat_longs = turns[['GTFS Latitude','GTFS Longitude']].drop_duplicates(keep='first').dropna(how='any')
lat_longs.reset_index(drop=True, inplace=True)

longs = np.array(lat_longs['GTFS Longitude'])
lats = np.array(lat_longs['GTFS Latitude'])

In [9]:
lat_longs['points'] = convert_to_point(longs, lats)

In [10]:
lat_longs.head()

Unnamed: 0,GTFS Latitude,GTFS Longitude,points
0,40.866978,-73.893509,POINT (-5388659.980590556 2328013.221235313)
1,40.751776,-73.976848,POINT (-5410795.920293068 2330219.44090841)
2,40.844434,-73.914685,POINT (-5393248.653874857 2329004.474736666)
3,40.700377,-73.950234,POINT (-5417217.488291109 2323777.891172175)
4,40.680829,-73.975098,POINT (-5421573.959626293 2325407.545724754)


Now I can merge the projected points back to my full dataframe

In [11]:
turns_pts = pd.merge(turns, lat_longs, how='left', on=['GTFS Latitude', 'GTFS Longitude'])
turns_pts.head()

Unnamed: 0,c/a,unit,scp,station,linename,division,date,time,desc,entries,...,timegroup,timegroupstr,wkdaynbr,GTFS Stop ID,Stop Name,Borough,GTFS Latitude,GTFS Longitude,station_id,points
0,N220,R155,01-00-02,KINGSBRIDGE RD,BD,IND,03/26/2016,05:00:00,REGULAR,669738,...,1,2 am - 6 am,6,D04,Kingsbridge Rd,Bx,40.866978,-73.893509,Kingsbridge RdConcourse,POINT (-5388659.980590556 2328013.221235313)
1,N220,R155,01-00-02,KINGSBRIDGE RD,BD,IND,03/26/2016,09:00:00,REGULAR,669845,...,2,6 am - 10 am,6,D04,Kingsbridge Rd,Bx,40.866978,-73.893509,Kingsbridge RdConcourse,POINT (-5388659.980590556 2328013.221235313)
2,N220,R155,01-00-02,KINGSBRIDGE RD,BD,IND,03/26/2016,13:00:00,REGULAR,669956,...,3,10 am - 2pm,6,D04,Kingsbridge Rd,Bx,40.866978,-73.893509,Kingsbridge RdConcourse,POINT (-5388659.980590556 2328013.221235313)
3,N220,R155,01-00-02,KINGSBRIDGE RD,BD,IND,03/26/2016,17:00:00,REGULAR,670074,...,4,2 pm - 6 pm,6,D04,Kingsbridge Rd,Bx,40.866978,-73.893509,Kingsbridge RdConcourse,POINT (-5388659.980590556 2328013.221235313)
4,N220,R155,01-00-02,KINGSBRIDGE RD,BD,IND,03/26/2016,21:00:00,REGULAR,670139,...,5,6 pm - 10 pm,6,D04,Kingsbridge Rd,Bx,40.866978,-73.893509,Kingsbridge RdConcourse,POINT (-5388659.980590556 2328013.221235313)


In [15]:
# Write out to CSV for Tableau testing 
turns_pts.to_csv('zipfile_test/turns_locations.csv', index=False)

Find points within certain radius

In [None]:
#central_pt = turns_pts.loc[0, 'points']
#print(central_pt)

## Try plotting points on map

In [16]:
#New York City Coordinates
NYC_COORD = [40.7128, -74.0059]

# Build map 
map_nyc = folium.Map(location=NYC_COORD, zoom_start=12, 
tiles='cartodbpositron', width=640, height=480)