# Plot the stations

## Install Libraries

In [16]:
import pandas as pd
import numpy as np
import geopandas as gpd
import mpl_toolkits as mpl
import pyproj as proj
from shapely import geometry


%matplotlib inline

## Read in turnstile data with location info

In [56]:
turns = pd.read_pickle('data/turns_data_locations.pkl')

In [3]:
turns.head()

Unnamed: 0,c/a,unit,scp,station,linename,division,date,time,desc,entries,...,Complex ID,GTFS Stop ID,Division,Line,Stop Name,Borough,Daytime Routes,Structure,GTFS Latitude,GTFS Longitude
0,N519,R461,00-03-01,B'WAY-LAFAYETTE,BDFQ6,IND,03/25/2017,05:00:00,REGULAR,17310033,...,619.0,D21,IND,6th Av - Culver,Broadway-Lafayette St,M,B D F M,Subway,40.725297,-73.996204
1,N519,R461,00-03-01,B'WAY-LAFAYETTE,BDFQ6,IND,03/25/2017,09:00:00,REGULAR,17310048,...,619.0,D21,IND,6th Av - Culver,Broadway-Lafayette St,M,B D F M,Subway,40.725297,-73.996204
2,N519,R461,00-03-01,B'WAY-LAFAYETTE,BDFQ6,IND,03/25/2017,13:00:00,REGULAR,17310132,...,619.0,D21,IND,6th Av - Culver,Broadway-Lafayette St,M,B D F M,Subway,40.725297,-73.996204
3,N519,R461,00-03-01,B'WAY-LAFAYETTE,BDFQ6,IND,03/25/2017,17:00:00,REGULAR,17310413,...,619.0,D21,IND,6th Av - Culver,Broadway-Lafayette St,M,B D F M,Subway,40.725297,-73.996204
4,N519,R461,00-03-01,B'WAY-LAFAYETTE,BDFQ6,IND,03/25/2017,21:00:00,REGULAR,17310743,...,619.0,D21,IND,6th Av - Culver,Broadway-Lafayette St,M,B D F M,Subway,40.725297,-73.996204


In [45]:
turns.columns

Index(['c/a', 'unit', 'scp', 'station', 'linename', 'division', 'date', 'time',
       'desc', 'entries', 'exits', 'datetime', 'datetime_clean', 'year',
       'weekday', 'group', 'group_id', 'station_id', 'entries_diff',
       'exit_diff', 'first_row_group', 'entries_outlier', 'exit_outlier',
       'all_traffic', 'week', 'hour', 'Remote', 'Booth', 'stop_id.2013',
       'Station.2013', 'Line.2013', 'Division.2013', 'Station ID',
       'Complex ID', 'GTFS Stop ID', 'Division', 'Line', 'Stop Name',
       'Borough', 'Daytime Routes', 'Structure', 'GTFS Latitude',
       'GTFS Longitude'],
      dtype='object')

In [57]:
# We can drop some of these columns
to_drop = ['Remote', 'Booth', 'stop_id.2013', 'Station.2013', 'Line.2013', 'Division.2013', 'Station ID',
          'Complex ID', 'Division', 'Line', 'Daytime Routes', 'Structure']

turns.drop(to_drop, axis=1, inplace=True)

## Transform Location Data
### Project lat/long points from geo coordinates to x,y values

In [58]:
# Set projections
inProj = proj.Proj(init='epsg:4326') # assuming you're using WGS84 geographic
outProj = proj.Proj(init='epsg:27700') # use a locally appropriate projected CRS

In [59]:
def convert_to_point(lon_array, lat_array, inProj=inProj, outProj=outProj):
    """
    For a pair of geographic coordinate, project to x,y coordinates and return for use with spatial functions
    """
    points = []
    
    for lon, lat in zip(lon_array, lat_array):
        
        # Cast the geographic coordinate pair to the projected system
        x, y = proj.transform(inProj, outProj, lon, lat)
        pnt = geometry.Point(x, y)
        points.append(pnt)
    
    return points

Because I don't need to run the above function on every single row in my turnstile dataframe (many rows for a single coordinate pair) I'll find the distinct lat/long pairs and convert them to points then merge back to full dataset

In [60]:
# Build array of distinct lat/longs to convert to points
lat_longs = turns[['GTFS Latitude','GTFS Longitude']].drop_duplicates(keep='first').dropna(how='any')
lat_longs.reset_index(drop=True, inplace=True)

longs = np.array(lat_longs['GTFS Longitude'])
lats = np.array(lat_longs['GTFS Latitude'])

In [61]:
lat_longs['points'] = convert_to_point(longs, lats)

In [62]:
lat_longs.head()

Unnamed: 0,GTFS Latitude,GTFS Longitude,points
0,40.725297,-73.996204,POINT (-5415907.726113115 2330758.689348908)
1,40.68138,-73.956848,POINT (-5420490.720184836 2323312.868763227)
2,40.713282,-74.006978,POINT (-5418337.3848751 2331236.897295345)
3,40.802098,-73.949625,POINT (-5401614.123335132 2330315.018892818)
4,40.7906,-73.947478,POINT (-5403255.90057637 2329319.730975604)


Now I can merge the projected points back to my full dataframe

In [63]:
turns_pts = pd.merge(turns, lat_longs, how='left', on=['GTFS Latitude', 'GTFS Longitude'])
turns_pts.head()

Unnamed: 0,c/a,unit,scp,station,linename,division,date,time,desc,entries,...,exit_outlier,all_traffic,week,hour,GTFS Stop ID,Stop Name,Borough,GTFS Latitude,GTFS Longitude,points
0,N519,R461,00-03-01,B'WAY-LAFAYETTE,BDFQ6,IND,03/25/2017,05:00:00,REGULAR,17310033,...,False,23.0,12,5,D21,Broadway-Lafayette St,M,40.725297,-73.996204,POINT (-5415907.726113115 2330758.689348908)
1,N519,R461,00-03-01,B'WAY-LAFAYETTE,BDFQ6,IND,03/25/2017,09:00:00,REGULAR,17310048,...,False,72.0,12,9,D21,Broadway-Lafayette St,M,40.725297,-73.996204,POINT (-5415907.726113115 2330758.689348908)
2,N519,R461,00-03-01,B'WAY-LAFAYETTE,BDFQ6,IND,03/25/2017,13:00:00,REGULAR,17310132,...,False,375.0,12,13,D21,Broadway-Lafayette St,M,40.725297,-73.996204,POINT (-5415907.726113115 2330758.689348908)
3,N519,R461,00-03-01,B'WAY-LAFAYETTE,BDFQ6,IND,03/25/2017,17:00:00,REGULAR,17310413,...,False,774.0,12,17,D21,Broadway-Lafayette St,M,40.725297,-73.996204,POINT (-5415907.726113115 2330758.689348908)
4,N519,R461,00-03-01,B'WAY-LAFAYETTE,BDFQ6,IND,03/25/2017,21:00:00,REGULAR,17310743,...,False,664.0,12,21,D21,Broadway-Lafayette St,M,40.725297,-73.996204,POINT (-5415907.726113115 2330758.689348908)


In [70]:
# Write out to CSV for Tableau testing 
turns_pts.to_csv('zipfile_test/turns_locations.csv', index=False)

Find points within certain radius

In [69]:
central_pt = turns_pts.loc[0, 'points']
print(central_pt)

POINT (-5415907.726113115 2330758.689348908)
