# State College Data Crawl - Getting the Data
Work with data focused on the bars of downtown State College

In [3]:
import pandas as pd, numpy as np
import requests
import json
import time

## Set Parameters

In [None]:
coordinates = ['40.7956, -77.8597']
keywords = ['bar']      
radius = '.7'           #km 
api_key = input()        #insert your Places API

# Places API
using Google Maps' Places API through Google Cloud

In [11]:
url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location='+coordinates[0]+'&radius='+str(radius)+'&keyword='+str(keywords[0])+'&key='+str(api_key)

respon = requests.get(url)
jj = json.loads(respon.text)
temp = jj['results']

In [6]:
final_data = []
place_id_lst = []
for result in temp:             
    name = result['name']
    place_id = result['place_id']
    lat = result['geometry']['location']['lat']
    lng = result['geometry']['location']['lng']
    rating = result['rating']
    types = result['types']
    vicinity = result['vicinity']
    if place_id not in place_id_lst:
      place_id_lst.append(place_id)
      data = [name, place_id, lat, lng, rating, types, vicinity]
      final_data.append(data)

## Export Bar Data

In [7]:
labels = ['Place Name','Place ID', 'Latitude', 'Longitude', 'Rating', 'Types', 'Vicinity']
df = pd.DataFrame.from_records(final_data, columns=labels)
df.to_csv('StateCollegeDataCrawl.csv')

# Distance Matrix
https://developers.google.com/maps/documentation/distance-matrix/distance-matrix

In [8]:
distance_matrix = []
label = ['']
location = ['']

#list of bar names
for i in df['Place Name']:
  label.append(i)

#list of place id
for i in df['Place ID']:
  location.append(i)

#creating a matrix requires a nested for loop
for row in range(len(location)):
  row_lst =[]
  #set origin based on Place ID
  origin = str(location[row])
  for column in range(len(location)):
    if row == 0:
      #create header with the label dataset
      row_lst.append(label[column])

    elif column == 0:
      row_lst.append(label[row])

    else:
      #set destination based on Place ID
      dest = str(location[column])
      
      #note the place_id tag since we will be working with place IDs
      #units are imperial
      #mode is walking
      url = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins=place_id:'+origin+'&destinations=place_id:'+dest+'&units=imperial&key='+api_key+'&mode=walking'
      
      response = requests.request("GET", url)
      temp = json.loads(response.text)
      
      #adds the "value" for the distance (meters)
      #for seconds change to: row_lst.append(temp["rows"][0]["elements"][0]['time']['value'])
      row_lst.append(temp["rows"][0]["elements"][0]['distance']['value'])

  distance_matrix.append(row_lst)


## Return distance matrix as csv

In [9]:
df_distance = pd.DataFrame.from_records(distance_matrix)
df_distance.to_csv('DataCrawlDistanceMatrix.csv')

# Multiple coordinates and/or keywords

In [None]:
'''
final_data = []
coordinates = ['40.7956, -77.8597']   #add more items as needed
keywords = ['bar']                    #add more items as needed

for coordinate in coordinates:
    for keyword in keywords:
        url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location='+coordinate+'&radius='+str(radius)+'&keyword='+str(keyword)+'&key='+str(api_key)
        
        while True:
          
            print(url)
            respon = requests.get(url)
            jj = json.loads(respon.text)
            results = jj['results']

            for result in results:
              
              name = result['name']
              place_id = result ['place_id']
              lat = result['geometry']['location']['lat']
              lng = result['geometry']['location']['lng']
              rating = result['rating']
              types = result['types']
              vicinity = result['vicinity']
              print(result)
              data = [name, place_id, lat, lng, rating, types, vicinity]
              final_data.append(data)
            time.sleep(5)
            
            if 'next_page_token' not in jj:
              break
            else:
              next_page_token = jj['next_page_token']
              url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?key='+str(api_key)+'&pagetoken='+str(next_page_token)
          
        print(url)
        respon = requests.get(url)
        jj = json.loads(respon.text)
        results = jj['results']

        for result in results:
                
          name = result['name']
          place_id = result ['place_id']
          lat = result['geometry']['location']['lat']
          lng = result['geometry']['location']['lng']
          rating = result['rating']
          types = result['types']
          vicinity = result['vicinity']
          
          data = [name, place_id, lat, lng, rating, types, vicinity]
          final_data.append(data)
          time.sleep(5)
'''