# Pulling in new data from Google Places and Google Maps API

### In order to enhance the initial bikeshare datasets I am going to add data on places near each bike hub to allow for better analysis of the placement, usage and customer base represented in the bikeshare data



In [189]:
import pandas as pd
import numpy as np
import requests
import re
from bs4 import BeautifulSoup as bs4
import json
import matplotlib.pyplot as plt
%matplotlib inline 

# read csv file into dataframe

In [2]:
data  = pd.read_csv('new_york_city.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year
0,5688089,2017-06-11 14:55:05,2017-06-11 15:08:21,795,Suffolk St & Stanton St,W Broadway & Spring St,Subscriber,Male,1998.0
1,4096714,2017-05-11 15:30:11,2017-05-11 15:41:43,692,Lexington Ave & E 63 St,1 Ave & E 78 St,Subscriber,Male,1981.0
2,2173887,2017-03-29 13:26:26,2017-03-29 13:48:31,1325,1 Pl & Clinton St,Henry St & Degraw St,Subscriber,Male,1987.0
3,3945638,2017-05-08 19:47:18,2017-05-08 19:59:01,703,Barrow St & Hudson St,W 20 St & 8 Ave,Subscriber,Female,1986.0
4,6208972,2017-06-21 07:49:16,2017-06-21 07:54:46,329,1 Ave & E 44 St,E 53 St & 3 Ave,Subscriber,Male,1992.0


# Create a list of all start station names

In [301]:
listOfStartStations = data['Start Station'].unique()
len(listOfStartStations)
listOfStartStations[2] = listOfStartStations[2] + " nyc"
listOfStartStations[2]

'1 Pl & Clinton St nyc'

In [302]:
listOfStartStations

array(['Suffolk St & Stanton St', 'Lexington Ave & E 63 St',
       '1 Pl & Clinton St nyc', 'Barrow St & Hudson St',
       '1 Ave & E 44 St', 'State St & Smith St', 'Front St & Gold St',
       'E 89 St & York Ave', 'Central Park S & 6 Ave', 'E 3 St & 1 Ave',
       'Bank St & Washington St', 'Front St & Maiden Ln',
       'E 10 St & 5 Ave', '1 Ave & E 68 St', 'N 11 St & Wythe Ave',
       'E 17 St & Broadway', 'E 2 St & Avenue C',
       'Central Park West & W 76 St', 'W 22 St & 8 Ave',
       'E 71 St & 1 Ave', 'University Pl & E 14 St', 'E 25 St & 2 Ave',
       'Dean St & Hoyt St', 'Allen St & Stanton St', 'NYCBS Depot - SSP',
       'W 26 St & 8 Ave', 'Great Jones St', 'W 43 St & 10 Ave',
       'Grand St & Elizabeth St', 'W 20 St & 11 Ave', 'Old Fulton St',
       'Allen St & Hester St', 'E 55 St & 3 Ave', '8 Ave & W 52 St',
       'Broadway & W 29 St', 'Cathedral Pkwy & Broadway',
       'Bayard St & Baxter St', 'Driggs Ave & N Henry St',
       'Perry St & Bleecker St', 'Broa

# Placed all lat/long info into an array in proper format for API

In [232]:
prepped_StartStation =[]
for x in listOfStartStations:
    prepstring_ = re.sub("\s", "%20", x)
    string_ = re.sub("&", "and", prepstring_)
    string_ = string_ + "%20nyc"
    prepped_StartStation.append(string_)
    
print(prepped_StartStation)

['Suffolk%20St%20and%20Stanton%20St%20nyc', 'Lexington%20Ave%20and%20E%2063%20St%20nyc', '1%20Pl%20and%20Clinton%20St%20nyc%20nyc', 'Barrow%20St%20and%20Hudson%20St%20nyc', '1%20Ave%20and%20E%2044%20St%20nyc', 'State%20St%20and%20Smith%20St%20nyc', 'Front%20St%20and%20Gold%20St%20nyc', 'E%2089%20St%20and%20York%20Ave%20nyc', 'Central%20Park%20S%20and%206%20Ave%20nyc', 'E%203%20St%20and%201%20Ave%20nyc', 'Bank%20St%20and%20Washington%20St%20nyc', 'Front%20St%20and%20Maiden%20Ln%20nyc', 'E%2010%20St%20and%205%20Ave%20nyc', '1%20Ave%20and%20E%2068%20St%20nyc', 'N%2011%20St%20and%20Wythe%20Ave%20nyc', 'E%2017%20St%20and%20Broadway%20nyc', 'E%202%20St%20and%20Avenue%20C%20nyc', 'Central%20Park%20West%20and%20W%2076%20St%20nyc', 'W%2022%20St%20and%208%20Ave%20nyc', 'E%2071%20St%20and%201%20Ave%20nyc', 'University%20Pl%20and%20E%2014%20St%20nyc', 'E%2025%20St%20and%202%20Ave%20nyc', 'Dean%20St%20and%20Hoyt%20St%20nyc', 'Allen%20St%20and%20Stanton%20St%20nyc', 'NYCBS%20Depot%20-%20SSP%20nyc', 

# Start stations ready for Google API to get lat and long

# Bikeshare_NBY_Search_Response.txt file was created to hold all findplacefromsearch response data

In [8]:
f = open("Bikeshare_NBY_Search_Response.txt", "a")
f.write("Jason Bourne\n")
f.close()

# Loop through start station responses and append to txt file
### this is so that we have the data saved and don't need to make redundant calls to the API. 

In [316]:
bad_addy = []
lat_long_compiled = []
for i in range(349, length):
    url = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input="+prepped_StartStation[i]+"&inputtype=textquery&fields=formatted_address%2Cname%2Cgeometry&key=D...Q4OFHPvYeDCKi1C86IRM"
    response1 = requests.request("GET", url)
    f = open("nearbyresponse.txt", "a")
    f.write(response1.text)
    f.write("\n")
    f.close()
    response1_json = response1.json()
    if response1_json['status'] == 'ZERO_RESULTS':
        bad_addy.append(prepped_StartStation[i])
        continue
    lat = response1_json['candidates'][0]['geometry']['location']['lat']
    long = response1_json['candidates'][0]['geometry']['location']['lng']
    lat_long_compiled.append(str(lat)+"%2C"+str(long))
print(lat_long_compiled)

['40.7275967%2C-73.9852947', '40.7502166%2C-74.0023398', '40.7731481%2C-73.9665404', '40.7078664%2C-74.0014542', '40.7140854%2C-73.95284', '40.7597914%2C-73.9762808', '40.715611%2C-73.9942638', '40.7692172%2C-73.9673069', '40.76975119999999%2C-73.96059249999999', '40.73271829999999%2C-73.9580567', '40.7285977%2C-73.9876718', '40.7234272%2C-73.9996171', '40.7216256%2C-74.0022137', '40.6980042%2C-73.9738198', '40.6793289%2C-73.9753223', '40.7242787%2C-74.0047479', '40.687601%2C-73.97174749999999', '40.7169748%2C-73.9822729', '40.7843637%2C-73.98330419999999', '40.7521773%2C-73.98753669999999', '40.69762100000001%2C-73.98497680000001', '40.7565652%2C-73.9870892', '40.7422384%2C-73.99702549999999', '40.7192438%2C-73.9525501', '40.677716%2C-73.973249', '40.723554%2C-73.9515031', '40.7503293%2C-73.99110139999999', '40.678837%2C-73.995794', '40.7045482%2C-74.0094781', '40.76094579999999%2C-73.9670505', '40.767869%2C-73.9660462', '40.679104%2C-73.987393', '40.72572539999999%2C-73.9509008', '40

# Looped through lat/long array and called Google Nearby Search to get establishments within 270ft of the bike hub.

## put that information into a dataframe with lat_long_key as a potential primary key

## created a new dataframe for analysis

In [318]:
full_json = []
full_json_append =[]
count_nearby_calls = 0
count_places = 0

for x in lat_long_compiled:
    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location="+x+"&name&radius=270&type=bar&key=D....FHPvYeDKi1C86IRM"
    nearbyResponse = requests.request("GET", url)
    f = open("Bikshare_PlacesNearby_Responses.txt", "a")
    f.write(nearbyResponse.text)
    f.write("\n")
    f.close()
    nearbyResponse_json = nearbyResponse.json()
    for y in range(len(nearbyResponse_json['results'])):
        nearbyResponse_json['results'][y]['lat_long_key'] = x
        nearbyResponse_json['results'][y]['Start Station'] = listOfStartStations[lat_long_compiled.index(x)]

        full_json.append(nearbyResponse_json['results'][y])
#         full_json_append.append(nearbyResponse_json['results'][y])
        count_places += 1
    count_nearby_calls += 1
nearby_df = pd.json_normalize(full_json)
nearby_df.head()

Unnamed: 0,business_status,icon,icon_background_color,icon_mask_base_uri,name,photos,place_id,price_level,rating,reference,...,geometry.location.lat,geometry.location.lng,geometry.viewport.northeast.lat,geometry.viewport.northeast.lng,geometry.viewport.southwest.lat,geometry.viewport.southwest.lng,opening_hours.open_now,plus_code.compound_code,plus_code.global_code,permanently_closed
0,OPERATIONAL,https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Please Don't Tell,"[{'height': 3413, 'html_attributions': ['<a hr...",ChIJb67-ZZ1ZwokRRxBYr3GlFp0,3.0,4.3,ChIJb67-ZZ1ZwokRRxBYr3GlFp0,...,40.727137,-73.983741,40.728409,-73.982443,40.725711,-73.985141,True,"P2G8+VG New York, NY, USA",87G8P2G8+VG,
1,OPERATIONAL,https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Cafe Mogador,"[{'height': 3456, 'html_attributions': ['<a hr...",ChIJK48-E51ZwokRvAA-v6fx9y0,2.0,4.5,ChIJK48-E51ZwokRvAA-v6fx9y0,...,40.727431,-73.984311,40.728676,-73.983037,40.725978,-73.985735,False,"P2G8+X7 New York, NY, USA",87G8P2G8+X7,
2,OPERATIONAL,https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Kazuza,"[{'height': 4032, 'html_attributions': ['<a hr...",ChIJg3MwTp1ZwokRR09egp9Xo-c,2.0,4.1,ChIJg3MwTp1ZwokRR09egp9Xo-c,...,40.725937,-73.983789,40.727269,-73.982364,40.724571,-73.985062,True,"P2G8+9F New York, NY, USA",87G8P2G8+9F,
3,OPERATIONAL,https://maps.gstatic.com/mapfiles/place_api/ic...,#7B9EB0,https://maps.gstatic.com/mapfiles/place_api/ic...,Sahara East,"[{'height': 1924, 'html_attributions': ['<a hr...",ChIJn1VL7J1ZwokRYSF8iqTh8Go,1.0,4.1,ChIJn1VL7J1ZwokRYSF8iqTh8Go,...,40.729455,-73.983696,40.73084,-73.982444,40.728142,-73.985142,True,"P2H8+QG New York, NY, USA",87G8P2H8+QG,
4,OPERATIONAL,https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,La Palapa,"[{'height': 3024, 'html_attributions': ['<a hr...",ChIJv-yiA51ZwokRfkf4YagKlPs,2.0,4.2,ChIJv-yiA51ZwokRfkf4YagKlPs,...,40.727868,-73.985663,40.729161,-73.984314,40.726463,-73.987012,False,"P2H7+4P New York, NY, USA",87G8P2H7+4P,


# data analysis of nearby establishments
### There are 2513 bars restaurants and nightclubs within 1 block of bike hubs
### President St & Henry St has the most establishments nearby (40) and an avg price level of 2.11


In [324]:
nearby_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2513 entries, 0 to 2512
Data columns (total 26 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   business_status                  2513 non-null   object 
 1   icon                             2513 non-null   object 
 2   icon_background_color            2513 non-null   object 
 3   icon_mask_base_uri               2513 non-null   object 
 4   name                             2513 non-null   object 
 5   photos                           2394 non-null   object 
 6   place_id                         2513 non-null   object 
 7   price_level                      1765 non-null   float64
 8   rating                           2403 non-null   float64
 9   reference                        2513 non-null   object 
 10  scope                            2513 non-null   object 
 11  types                            2513 non-null   object 
 12  user_ratings_total  

In [331]:
nearby_df['Start Station'].mode()

0    President St & Henry St
dtype: object

In [332]:
nearby_df[nearby_df['Start Station'] == 'President St & Henry St'].count()

business_status                    40
icon                               40
icon_background_color              40
icon_mask_base_uri                 40
name                               40
photos                             36
place_id                           40
price_level                        36
rating                             34
reference                          40
scope                              40
types                              40
user_ratings_total                 34
vicinity                           40
lat_long_key                       40
Start Station                      40
geometry.location.lat              40
geometry.location.lng              40
geometry.viewport.northeast.lat    40
geometry.viewport.northeast.lng    40
geometry.viewport.southwest.lat    40
geometry.viewport.southwest.lng    40
opening_hours.open_now             34
plus_code.compound_code            40
plus_code.global_code              40
permanently_closed                  6
dtype: int64

In [333]:
pres_hen_df = nearby_df[nearby_df['Start Station'] == 'President St & Henry St']
pres_hen_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 40 entries, 1286 to 2314
Data columns (total 26 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   business_status                  40 non-null     object 
 1   icon                             40 non-null     object 
 2   icon_background_color            40 non-null     object 
 3   icon_mask_base_uri               40 non-null     object 
 4   name                             40 non-null     object 
 5   photos                           36 non-null     object 
 6   place_id                         40 non-null     object 
 7   price_level                      36 non-null     float64
 8   rating                           34 non-null     float64
 9   reference                        40 non-null     object 
 10  scope                            40 non-null     object 
 11  types                            40 non-null     object 
 12  user_ratings_total 

In [334]:
pres_hen_df['price_level'].mean()

2.111111111111111

In [137]:
y.keys()

dict_keys(['business_status', 'geometry', 'icon', 'icon_background_color', 'icon_mask_base_uri', 'name', 'opening_hours', 'photos', 'place_id', 'plus_code', 'rating', 'reference', 'scope', 'types', 'user_ratings_total', 'vicinity'])