# Write functions to find the closest point on a route to our actual data point

Started by Nathaniel on Sunday, June 9, 2019

In [1]:
%load_ext autoreload
%autoreload 2

!date
!whoami

import numpy as np
import pandas as pd

Sun Jun  9 22:50:31 PDT 2019
ndbs


## Import my closest point module and read in a GTFS `shapes.txt` file

In [2]:
import find_closest_route_point as f

In [6]:
!ls ../data/source/gtfs_20180815/

[31magency.txt[m[m          [31mcalendar.txt[m[m        [31mfare_rules.txt[m[m      [31mstop_times.txt[m[m
[31mblock.txt[m[m           [31mcalendar_dates.txt[m[m  [31mroutes.txt[m[m          [31mstops.txt[m[m
[31mblock_trip.txt[m[m      [31mfare_attributes.txt[m[m [31mshapes.txt[m[m          [31mtrips.txt[m[m


In [7]:
shapes_df = pd.read_csv('../data/source/gtfs_20180815/shapes.txt')
shapes_df.head()

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
0,10002011,47.612137,-122.281769,1,0.0
1,10002011,47.612144,-122.281784,2,5.8
2,10002011,47.612148,-122.28183,3,13.5
3,10002011,47.612141,-122.281853,4,22.0
4,10002011,47.612102,-122.281921,5,45.0


## Exploring some Python syntax: Test how argument unpacking works with assignment

In [18]:
# Test argument unpacking with arrays
a = np.array([4,8])
x,y = a
x

4

In [48]:
# Test more argument unpacking
b = np.array([0,0])
c, *b = x, y, 5
b

[8, 5]

In [49]:
*b, = a
b

[4, 8]

In [50]:
b is a

False

In [51]:
type(b)

list

In [52]:
type(a)

numpy.ndarray

## Get data for a sample point (at index 2) for testing

In [16]:
shape_id, lat, lon, seq, dist = shapes_df.iloc[2]

In [17]:
shape_id

10002011.0

In [19]:
lat

47.612148299999994

## Try finding adjacent points

Still working on the best method for this part... It might be best to have one function that does this as well as computes the total shape distance to the projected point. Or perhaps just return the indices of these points, so that we can access `shape_dist_traveled` for them later.

In [26]:
point_data = shapes_df[(shapes_df.shape_pt_lat==lat) & (shapes_df.shape_pt_lon==lon) & (shapes_df.shape_id==shape_id)]
point_data

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
2,10002011,47.612148,-122.28183,3,13.5


In [27]:
point_data.shape_pt_sequence

2    3
Name: shape_pt_sequence, dtype: int64

In [32]:
mask = (shapes_df.shape_pt_lat==lat) & (shapes_df.shape_pt_lon==lon) & (shapes_df.shape_id==shape_id)
type(mask)

pandas.core.series.Series

In [33]:
mask.head()

0    False
1    False
2     True
3    False
4    False
dtype: bool

In [37]:
adjacent_points = np.empty((2,2))
adjacent_points(np.nan)

In [39]:
prev_mask = (shapes_df.shape_id==shape_id) & (shapes_df.shape_pt_sequence == seq-1)
prev_mask.head()

0    False
1     True
2    False
3    False
4    False
dtype: bool

In [40]:
any(prev_mask)

True

In [44]:
adjacent_points[0] = ((shapes_df[prev_mask].shape_pt_lat, shapes_df[prev_mask].shape_pt_lon)
                      if any(prev_mask) else np.nan)
adjacent_points

array([[  47.6121445, -122.281784 ],
       [         nan,          nan]])

In [45]:
shapes_df.loc[prev_mask, ['shape_pt_lat', 'shape_pt_lon']]

Unnamed: 0,shape_pt_lat,shape_pt_lon
1,47.612144,-122.281784


In [53]:
shapes_df[prev_mask]

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
1,10002011,47.612144,-122.281784,2,5.8


In [55]:
adjacent_mask = (shapes_df.shape_id==shape_id) & (np.abs(shapes_df.shape_pt_sequence-seq)==1)
adjacent_mask.head()

0    False
1     True
2    False
3     True
4    False
dtype: bool

In [62]:
shapes_df[adjacent_mask]

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
1,10002011,47.612144,-122.281784,2,5.8
3,10002011,47.612141,-122.281853,4,22.0


In [73]:
f.find_adjacent_shape_point_data(lat, lon, shapes_df, shape_id)

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
1,10002011,47.612144,-122.281784,2,5.8
3,10002011,47.612141,-122.281853,4,22.0


# Check beginning and end of shape

In [74]:
shape_id, lat, lon, seq, dist = shapes_df.iloc[0]
f.find_adjacent_shape_point_data(lat, lon, shapes_df, shape_id)

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
1,10002011,47.612144,-122.281784,2,5.8


In [77]:
shapes_df.loc[shapes_df.shape_id==shape_id,'shape_pt_sequence'].max()

201

In [79]:
shapes_df.iloc[200]

shape_id               1.000201e+07
shape_pt_lat           4.760922e+01
shape_pt_lon          -1.223301e+02
shape_pt_sequence      2.010000e+02
shape_dist_traveled    1.605600e+04
Name: 200, dtype: float64

In [80]:
shape_id, lat, lon, seq, dist = shapes_df.iloc[200]
f.find_adjacent_shape_point_data(lat, lon, shapes_df, shape_id)

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
199,10002011,47.609356,-122.329788,200,15959.7
