In [3]:
# Use Google Map API to find bounds of city
# Some ride share data originates and ends far outside of Austin, we are not interested in these

# Dependencies
import requests
import json
import pandas as pd

# Google developer API key
from api_config import g_key

# Read cleaned ride data File and store into Pandas data frame
ride_data = pd.read_csv("Data/Rides_Data_Clean.csv")

# Target city
target_city = "Austin"

# Build the endpoint URL
target_url = ('https://maps.googleapis.com/maps/api/geocode/json?'
    'address={0}&key={1}').format(target_city, g_key)

In [4]:
# Run a request to endpoint and convert result to json
geo_data = requests.get(target_url).json()

# Print the json
print(geo_data)

{'results': [{'address_components': [{'long_name': 'Austin', 'short_name': 'Austin', 'types': ['locality', 'political']}, {'long_name': 'Travis County', 'short_name': 'Travis County', 'types': ['administrative_area_level_2', 'political']}, {'long_name': 'Texas', 'short_name': 'TX', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'United States', 'short_name': 'US', 'types': ['country', 'political']}], 'formatted_address': 'Austin, TX, USA', 'geometry': {'bounds': {'northeast': {'lat': 30.5168629, 'lng': -97.5684199}, 'southwest': {'lat': 30.0986589, 'lng': -97.9383829}}, 'location': {'lat': 30.267153, 'lng': -97.7430608}, 'location_type': 'APPROXIMATE', 'viewport': {'northeast': {'lat': 30.5168629, 'lng': -97.5684199}, 'southwest': {'lat': 30.0986589, 'lng': -97.9383829}}}, 'place_id': 'ChIJLwPMoJm1RIYRetVp1EtGm10', 'types': ['locality', 'political']}], 'status': 'OK'}


In [5]:
# Print the json (nicer this time)
print(json.dumps(geo_data, indent=4, sort_keys=True))

{
    "results": [
        {
            "address_components": [
                {
                    "long_name": "Austin",
                    "short_name": "Austin",
                    "types": [
                        "locality",
                        "political"
                    ]
                },
                {
                    "long_name": "Travis County",
                    "short_name": "Travis County",
                    "types": [
                        "administrative_area_level_2",
                        "political"
                    ]
                },
                {
                    "long_name": "Texas",
                    "short_name": "TX",
                    "types": [
                        "administrative_area_level_1",
                        "political"
                    ]
                },
                {
                    "long_name": "United States",
                    "short_name": "US",
                    "types": [
  

In [6]:
# Extract bounding box for city from viewport
# Extract latitude and longitude upper and lower bounds
lat_ne = geo_data["results"][0]["geometry"]["bounds"]["northeast"]["lat"]
long_ne = geo_data["results"][0]["geometry"]["bounds"]["northeast"]["lng"]
lat_sw = geo_data["results"][0]["geometry"]["bounds"]["southwest"]["lat"]
long_sw = geo_data["results"][0]["geometry"]["bounds"]["southwest"]["lng"]

# Print the latitude and longitude bounds to check
print('''
    City: {0}
    Latitude_ne: {1}
    Longitude_ne: {2}
    Latitude_sw: {3}
    Longitude_sw: {4}
    '''.format(target_city, lat_ne, long_ne, lat_sw, long_sw))


    City: Austin
    Latitude_ne: 30.5168629
    Longitude_ne: -97.5684199
    Latitude_sw: 30.0986589
    Longitude_sw: -97.9383829
    


In [7]:
# Check ride_data column names for lat long
ride_data.head()

Unnamed: 0.1,Unnamed: 0,created_date,end_location_lat,end_location_long,start_location_lat,start_location_long,Date
0,0,2016-06-04 00:10:19-05:00,38.676,-121.039,38.655,-121.071,2016-06-04 00:10:19-05:00
1,1,2016-06-04 01:47:39-05:00,38.676,-121.039,38.679,-121.038,2016-06-04 01:47:39-05:00
2,2,2016-06-04 03:13:07-05:00,38.676,-121.039,38.678,-121.042,2016-06-04 03:13:07-05:00
3,3,2016-06-04 09:52:35-05:00,30.271,-97.742,30.247,-97.764,2016-06-04 09:52:35-05:00
4,4,2016-06-04 10:23:25-05:00,30.269,-97.752,30.269,-97.75,2016-06-04 10:23:25-05:00


In [49]:
# make a filter to drop any rides that are not started within the bounds of Austin as determined by google maps boundry

filter = ((ride_data['start_location_lat'] < lat_ne) & (ride_data['start_location_lat'] > lat_sw)) | ((ride_data['start_location_long'] > long_ne) & (ride_data['start_location_long'] < long_sw))

# select only rows within the city bounds as determined by google api
ride_data_filtered=ride_data.loc[filter]

# check locations
print (long_ne, long_sw)
print(ride_data['start_location_long'].min())

-97.5684199 -97.9383829
-121.071


In [50]:
# Check counts pre and post filterfed

print(ride_data.count(),ride_data_filtered.count() )
ride_data_filtered.head()

Unnamed: 0             1494125
created_date           1494125
end_location_lat       1494125
end_location_long      1494125
start_location_lat     1494125
start_location_long    1494125
Date                   1494125
dtype: int64 Unnamed: 0             1489613
created_date           1489613
end_location_lat       1489613
end_location_long      1489613
start_location_lat     1489613
start_location_long    1489613
Date                   1489613
dtype: int64


Unnamed: 0.1,Unnamed: 0,created_date,end_location_lat,end_location_long,start_location_lat,start_location_long,Date
3,3,2016-06-04 09:52:35-05:00,30.271,-97.742,30.247,-97.764,2016-06-04 09:52:35-05:00
4,4,2016-06-04 10:23:25-05:00,30.269,-97.752,30.269,-97.75,2016-06-04 10:23:25-05:00
5,5,2016-06-04 22:32:42-05:00,30.27,-97.749,30.258,-97.749,2016-06-04 22:32:42-05:00
6,6,2016-06-04 23:06:20-05:00,30.267,-97.745,30.203,-97.771,2016-06-04 23:06:20-05:00
7,7,2016-06-05 01:54:08-05:00,30.242,-97.781,30.27,-97.75,2016-06-05 01:54:08-05:00


In [52]:
# Save cleaned file 
ride_data_filtered.to_csv("Data/Rides_Data_Clean_Bounded.csv", encoding='utf-8')