This is Ipython Notebook two of five for my final project in General Assembly's data science course.

The following code gathers data from the Google Places API. 

Latitude and longitude coordinates from Craigslist listings are used as inputs to the API to gather relevant 
places of interest near each listing. Gathered places currently include grocery stores and gyms. Code collecting 
movie_theatres, train_stations, airports, subway_stations, Barnes_and_Nobles, Deloitte Offices, and Starbucks 
have been commented out to reduce requests to the the API as these places were not found to provide predictive power.

In [1]:
import requests # Helps construct the request to send to the API
import json # JSON helper functions
from bs4 import BeautifulSoup # Data scraping library
import pandas as pd
import time
import ssl

In [8]:
# Get the data collected from Craigslist and select the location attributes for use in the Google Places API

data = pd.read_csv(r'C:\Users\alsherman\Desktop\GitHub\DataScience_GeneralAssembly\Data\Craigslist_Data_May_3_.csv')
#https://raw.githubusercontent.com/Alexjmsherman/DataScience_GeneralAssembly/master/Data/Craigslist_Data_May_3_.csv
location = zip(data['latitude'],data['longitude'],data['ID'])

In [9]:
# Display data

data.head(3)

Unnamed: 0,ID,city,country,latitude,location_data_accuracy,longitude,state,availability,average_image_size,bathroom,...,dog,housing_type,image_number,laundry,parking,price,smoking,square_footage,time_of_posting,url
0,4959351766,,,,,,,available now,270000,1.0,...,,condo,18,,,$1310,,,2015-04-01 2:44pm,http://washingtondc.craigslist.org/mld/apa/495...
1,4959370650,Alexandria,US,38.806,22.0,-77.0529,DC,available now,0,,...,dogs are OK - wooof,house,0,,attached garage,$860,,,2015-04-01 2:55pm,http://washingtondc.craigslist.org/mld/apa/495...
2,4959459805,,,,,,,available now,0,1.0,...,,apartment,0,laundry in bldg,attached garage,$1301,,450.0,2015-04-01 3:46pm,http://washingtondc.craigslist.org/doc/apa/495...


In [10]:
# Create a url for each listing with the latitude, longitude, and nearby search terms (e.g. grocery store)

def create_url(loc,types,keyword=''):
    
    url = r'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=' \
        + str(loc[0]) + ',' + str(loc[1]) + \
        '&radius=1610&rankby=prominence&' \
        + types + keyword + 'key=AIzaSyCoCXvd2OjiNx_TpfuobvWHlZO4uV5GtbY'

    return url

In [11]:
#returns a count of the provided search term (e.g. 20 nearby grocery stores)

def count_nearby_places(url):

    # Make the request
    response = requests.post(url)

    # place data in json object
    json_data = json.loads(response.text)

    # search through results and returns count of term
    place_count = 0
    name = ''
    for ind, search_term in enumerate(json_data['results']):
        place_count = ind + 1
        name = search_term['name'] #if I decide to group by certain names - not currently used
    return place_count

In [44]:
# Create several lists of nearby places by making several search requests to the Google Places API for each listing
# To reduce cals to the API, I have commented out attributes that my analysis revealed as not significant

ID_list = []
grocery_list = []
gym_list = []
#movies_theatre_list = []
#train_station_list = []
#airport_list = []
#subway_station_list = []
#barnes_and_nobles_list = []
#Deloitte_list = []
#Starbucks_list = []

for loc in location:
    #Get the unique id for the listing (use this to combine google places data with listing data)
    ID = loc[2]
    ID_list.append(ID)
    
    #add nearby grocery stores
    types = 'types=grocery_or_supermarket&'
    url = create_url(loc, types)
    grocery_list.append(count_nearby_places(url))
    
    #add nearby gyms
    types = 'types=gym&'
    url = create_url(loc, types)
    gym_list.append(count_nearby_places(url))
    
    #add nearby movie theatres
    #types = 'types=movie_theater&'
    #url = create_url(loc, types)
    #movies_theatre_list.append(count_nearby_places(url))
    
    #add nearby train stations
    #types = 'types=train_station&'
    #url = create_url(loc, types)
    #train_station_list.append(count_nearby_places(url))
  
    #add nearby airports
    #types = 'types=airports&'
    #url = create_url(loc, types)
    #airport_list.append(count_nearby_places(url))
    
    #add nearby subway_station
    #types = 'types=subway_station&'
    #url = create_url(loc, types)
    #subway_station_list.append(count_nearby_places(url))
    
    #add nearby barnes and nobles
    #keyword = 'keyword=barnesandnobles&'
    #url = create_url(loc, keyword)
    #barnes_and_nobles_list.append(count_nearby_places(url))

    #add nearby Deloitte
    #keyword = 'keyword=Deloitte&'
    #url = create_url(loc, keyword)
    #Deloitte_list.append(count_nearby_places(url))
    
    #add nearby Starbucks
    #keyword = 'keyword=Starbucks&'
    #url = create_url(loc, keyword)
    #Starbucks_list.append(count_nearby_places(url))

#zip together all search terms for each listing    
data = zip(ID_list, grocery_list,gym_list)
           #movies_theatre_list,train_station_list,
           #airport_list,subway_station_list,
           #barnes_and_nobles_list,Deloitte_list,Starbucks_list)

data = pd.DataFrame(data)
data.columns = [['ID','grocery_list','gym_list']]
                 #'movie_theatre_list','train_station_list',
                 #'airport_list','subway_station_list',
                 #'Barnes_and_Nobles_list','Deloitte_list','Starbucks_list']]

print data.head()

           ID  grocery_list  gym_list
0  4959351766             0         0
1  4959370650            19        15
2  4959459805             0         0
3  4959625603             0         0
4  4960122664            20        20


In [46]:
# Create a csv - this is concatenated with the listing data in a later Ipython Notebook

data.to_csv(r'C:\Users\alsherman\Desktop\GitHub\DataScience_GeneralAssembly\Data\GooglePlacesAPI_May_13_part3.csv', index=False)