In [1]:
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import googlemaps

%matplotlib inline

# Load Dataset

In [2]:
address = pd.read_csv('./data/Bus/Employee_Addresses.csv')
address.head()

Unnamed: 0,address,employee_id
0,"98 Edinburgh St, San Francisco, CA 94112, USA",206
1,"237 Accacia St, Daly City, CA 94014, USA",2081
2,"1835 Folsom St, San Francisco, CA 94103, USA",178
3,"170 Cambridge St, San Francisco, CA 94134, USA",50
4,"16 Roanoke St, San Francisco, CA 94131, USA",1863


In [3]:
address.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2191 entries, 0 to 2190
Data columns (total 2 columns):
address        2191 non-null object
employee_id    2191 non-null int64
dtypes: int64(1), object(1)
memory usage: 34.3+ KB


In [4]:
stops = pd.read_csv('./data/Bus/Potentail_Bust_Stops.csv')
stops.head()

Unnamed: 0,Street_One,Street_Two
0,MISSION ST,ITALY AVE
1,MISSION ST,NEW MONTGOMERY ST
2,MISSION ST,01ST ST
3,MISSION ST,20TH ST
4,MISSION ST,FREMONT ST


In [5]:
stops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119 entries, 0 to 118
Data columns (total 2 columns):
Street_One    119 non-null object
Street_Two    119 non-null object
dtypes: object(2)
memory usage: 1.9+ KB


# Data Processing

In [6]:
# get unique address and stop information
unique_address = list(set(address['address']))

def merge_stop(x):
    return x[0] + ' & ' + x[1] + ' CA'

unique_stop = list(set(stops.apply(merge_stop, axis=1)))

In [7]:
unique_address[:10]

['206 Lisbon St, San Francisco, CA 94112, USA',
 '40 Everson St, San Francisco, CA 94131, USA',
 '2635 Scott St, San Francisco, CA 94123, USA',
 '475 Bosworth St, San Francisco, CA 94112, USA',
 '122 Rousseau St, San Francisco, CA 94112, USA',
 '398 Hampshire St, San Francisco, CA 94110, USA',
 '900 Persia Ave, San Francisco, CA 94112, USA',
 '2885 Diamond St, San Francisco, CA 94131, USA',
 '98 Lisbon St, San Francisco, CA 94112, USA',
 '599 S Van Ness Ave, San Francisco, CA 94110, USA']

In [8]:
unique_stop[:10]

['MISSION ST & RANDALL ST CA',
 'MISSION ST & LOWELL ST CA',
 'MISSION ST & NIAGARA AVE CA',
 'MISSION ST & FREMONT ST CA',
 'MISSION ST & ANGELOS ALY CA',
 'MISSION ST & 19TH ST CA',
 'MISSION ST & ROLPH ST CA',
 'MISSION ST & BRAZIL AVE CA',
 'MISSION ST & LASKIE ST CA',
 'MISSION ST & JESSIE EAST ST CA']

# Distance Measure

Here, I am using the Google Distance Matrix API for origin and destination walking distance measure. The GitHub link for google-maps-services-python can be found at: [https://github.com/googlemaps/google-maps-services-python](https://github.com/googlemaps/google-maps-services-python).

Since this is a charged service, I only show one query for demo purpose. After query the distance, I believe the next step is pretty clear.

In [9]:
# start google API with your KEY (you can register at Google.com)
gmaps = googlemaps.Client(key='*************************************')

In [10]:
# defind origin and destination
origin = unique_address[0]
destination = unique_stop[0]

print('Origin:\t\t', origin)
print('Destination:\t', destination)

Origin:		 206 Lisbon St, San Francisco, CA 94112, USA
Destination:	 MISSION ST & RANDALL ST CA


In [11]:
# query walking distance between origins and destinations
result = gmaps.distance_matrix(origins=origin, destinations=destination, 
                               mode='walking')
result

{'destination_addresses': ['Mission St & Randall St, San Francisco, CA 94110, USA'],
 'origin_addresses': ['206 Lisbon St, San Francisco, CA 94112, USA'],
 'rows': [{'elements': [{'distance': {'text': '1.9 km', 'value': 1947},
     'duration': {'text': '25 mins', 'value': 1487},
     'status': 'OK'}]}],
 'status': 'OK'}

# Reference Solution

There is another solution online, you can refer to [Link](https://github.com/stasi009/TakeHomeDataChallenges/blob/master/14.ShuttleStops/shuttle_stops.py).