## **CAPSTONE**

### Battle of the Neighborhoods  

#### Pickup solution for delivery company 
Takeshi.Yagyu

#### Load library

In [0]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

# !conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

# !conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

import re
import requests
import pprint
pp = pprint.PrettyPrinter(indent=4)

#### Select major shopping districts for this project. 
1.  Select major shopping districts in my experience.
1.  Get the latitude and longitude of these centers.

In [0]:
# Use Nominatim to convert address to  latitude and longitude 
geolocator = Nominatim(user_agent="foursquare_agent")

# Selecting in my experience
centers = ['Ginza, Tokyo','Shinjuku, Tokyo','Akihabara, Tokyo', 
           'Shibuya, Tokyo','Odaiba, Tokyo'] 
major_area = []

# Get latitude & longitude
for address in centers:
  try:
    location = geolocator.geocode(address)
    major_area.append({'name': address,
                       'lat': location.latitude,
                       'lng': location.longitude})
  except:
    major_area.append({'name': address,'lat': None,'lng': None})

pp.pprint(major_area)

[   {'lat': 35.66951555, 'lng': 139.7643055988, 'name': 'Ginza, Tokyo'},
    {'lat': 35.6937632, 'lng': 139.7036319, 'name': 'Shinjuku, Tokyo'},
    {'lat': 35.6997364, 'lng': 139.7712503, 'name': 'Akihabara, Tokyo'},
    {'lat': 35.6645956, 'lng': 139.6987107, 'name': 'Shibuya, Tokyo'},
    {'lat': 35.61912805, 'lng': 139.779403349221, 'name': 'Odaiba, Tokyo'}]


In [0]:
shop_google_df = pd.read_csv('/content/drive/My Drive/shop_google_df_5columns.csv')
park_df = pd.read_csv('/content/drive/My Drive/park_df.csv')
shop_df = pd.read_csv('/content/drive/My Drive/shop_df.csv')

shop_google_df.head()
park_df.head()
shop_df[['name','lat','lng']].head()

Unnamed: 0,name,lat,lng
0,播磨屋本店 東京店,35.669875,139.747833
1,LAOX (ラオックス 秋葉原本店),35.698124,139.770963
2,FREEMAN 西新宿店,35.696136,139.698464
3,Akky II,35.699185,139.77164
4,フリーハンド,35.682323,139.704488


In [0]:
google_api_key = 'AIzaSyCOI0sWU1fMjN4wQtFxp1e7OpXx9pwH-fw'

#### Search duty-free stores from google map
1.  Use google place nearby search api.
1.  The radius is set to 5km
1.  Filter 

In [0]:
# new dataframe
shop_google_df = pd.DataFrame()

# search from selected major disticts
for area in major_area:

  search = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json'

  # set radius to 5km, keyword to "duty free"
  # filer on "type: store"
  payload = {'location':  f'{area["lat"]},{area["lng"]}', 
             'radius': 5000,
             'keyword': 'duty free',
             'type': 'store', 
             'key': google_api_key,
             'language': 'en'
            }
  response = requests.get(search, params=payload).json()

  res = response['results']
  shop_google_df_tmp = json_normalize(data=res)

  # get the necessary information only
  columns = ['id', 'name',
             'geometry.location.lat', 
             'geometry.location.lng',
             'user_ratings_total', 
             'rating'] 
  shop_google_df_tmp = shop_google_df_tmp[columns]

  # shorten column name
  columns = ['id', 'name','lat', 'lng', 'user_ratings_total', 'rating']
  shop_google_df_tmp.columns = columns

  # concat searched results
  shop_google_df = pd.concat([shop_google_df, shop_google_df_tmp], sort=True)
  print(area['name'], shop_google_df_tmp.shape)

print('----------------')
print('Total ', shop_google_df.shape)

# merge search results with same id
shop_google_df = shop_google_df.groupby('id').first()

print('Merged ', shop_google_df.shape)

Ginza, Tokyo (19, 6)
Shinjuku, Tokyo (12, 6)
Akihabara, Tokyo (17, 6)
Shibuya, Tokyo (13, 6)
Odaiba, Tokyo (6, 6)
----------------
Total  (67, 6)
Merged  (34, 5)


#### Get the parking area infomation
1. Get list from https://www.s-park.jp/bus that is exclusively for motorcoach.
1. Convert address to latitude and longitude using google map api.

In [0]:
# pre-defined function
def get_coordinates(api_key, address, verbose=False):
    try:

        url = 'https://maps.googleapis.com/maps/api/geocode/json?key={}&address={}'.format(api_key, address)
        response = requests.get(url).json()
        if verbose:
            print('Google Maps API JSON result =>', response)
        results = response['results']

        # get geographical coordinates
        geographical_data = results[0]['geometry']['location'] 
        lat = geographical_data['lat']
        lon = geographical_data['lng']
        return [lat, lon]

    except:
        return [None, None]

# parking list  exclusively for motorcoach
park_df = pd.read_html("https://www.s-park.jp/bus")
park_df = park_df[0]
park_df.columns=['Name', 'Address', 'Telphone', 'Number']

# data wrangling
park_df = park_df[park_df['Name'] != park_df['Address']]
park_df.drop(columns='Telphone', axis=1, inplace=True)
park_df['Number'] = park_df['Number'].apply(lambda x: re.sub('\D','',x))

get_location = lambda x: get_coordinates(google_api_key, x)
park_df[['lat', 'lng']] = park_df['Address'].apply(get_location).apply(pd.Series)

park_df.head()

Unnamed: 0,Name,Address,Number,lat,lng
0,北の丸公園第三駐車場,東京都千代田区北の丸公園１番,20,35.691279,139.749237
1,丸ノ内鍛冶橋駐車場,東京都千代田区丸の内３丁目８番２号,22,35.677149,139.766106
2,靖国神社外苑駐車場,東京都千代田区九段北２丁目１番１号,23,35.694195,139.745354
4,市場橋駐車場（観光バス）,東京都中央区築地４丁目１５番２号,9,35.665995,139.768744
5,タイムズ晴海４丁目バスプール,東京都中央区晴海4-6,8,35.653477,139.780225


#### Search duty-free stores using FourSquare API

In [0]:
CLIENT_ID = '3LEGE3Z0A3YBMUECEVL4OJEG45GFCYXU3FBTMXO0SABKFF0Q' # your Foursquare ID
CLIENT_SECRET = 'CYGIDYBNSEE34TVSELEB0R41XENDN50OOQZKUZ51MVJFPAS2' # your Foursquare Secret

In [0]:
shop_df = pd.DataFrame()

# search fqursquare venues that like duty-free store
for area in major_area:

  search = 'https://api.foursquare.com/v2/venues/search'

  payload = {'client_id': CLIENT_ID, 
            'client_secret': CLIENT_SECRET, 
            'll': f'{area["lat"]},{area["lng"]}',
            'v': '20180604',
            'query': 'tax free',
            'radius': 5000,
            'limit': 1000
            }

  res = requests.get(search, params=payload).json()
  shop_df_tmp = json_normalize(data=res['response']['venues'])
  shop_df_tmp['categories'] = shop_df_tmp['categories'].apply(
                                lambda x: x[0]['name'] if x else 'UNKNOWN')
  
  # filter categories to store 
  shop_df_tmp = shop_df_tmp[shop_df_tmp['categories'].str.contains('Store')]

  shop_df = pd.concat([shop_df, shop_df_tmp], sort=False)
  print(area['name'], shop_df_tmp.shape)

print('-----------------------')
print('Total ', shop_df.shape)

shop_df = shop_df.groupby('id').first()
print('Merged ', shop_df.shape)

shop_df.columns = [col.split('.')[-1] for col in shop_df.columns]

columns = ['name', 'lat', 'lng']
shop_df = shop_df[columns]

Ginza, Tokyo (5, 19)
Shinjuku, Tokyo (6, 18)
Akihabara, Tokyo (6, 19)
Shibuya, Tokyo (10, 18)
Odaiba, Tokyo (0, 17)
-----------------------
Total  (27, 19)
Merged  (18, 18)


#### Merge search results from google map and FourSquare.

In [0]:
print('FourSquare Search Resullts: ', shop_df.shape)
print('Google Map Search Resullts: ', shop_google_df.shape)

# merge
shop_df = pd.concat([shop_df,shop_google_df], axis=0, sort=True)

shop_df.reset_index(inplace=True, drop=True)

# fill NaN
shop_df = shop_df.fillna(shop_df.median())

print('-----------------------------------------')
print('Merged duty-free stores   : ', shop_df.shape)

FourSquare Search Resullts:  (16, 4)
Google Map Search Resullts:  (34, 6)
-----------------------------------------
Merged duty-free stores   :  (50, 6)


In [0]:
shop_df.sort_values(by='user_ratings_total', ascending=False)

#### Data Visualization and Some Simple Statistical Analysis
1. Show parking areas on map - green circle
2. Show duty-free stores on map - blue circle

In [0]:
import folium

center = park_df.loc[0][['lat','lng']]
tokyo_map = folium.Map(location=center, zoom_start=12, width='75%', height='75%')

# show parking areas
dataset =  zip(park_df['lat'],  park_df['lng'], 
               park_df['Name'], park_df['Number'])
for lat, lng, name, number in dataset:
    label = '{}, {}'.format(name, number)
    label = folium.Popup(label, parse_html=True)
    # number = 1/ (1+ np.exp(number))
    folium.CircleMarker(
        [lat, lng],
        radius= (number//100 +1)*8,
        popup=label,
        color='green',
        fill=True,
        fill_color='#00ff00',
        fill_opacity=0.7,
        parse_html=False).add_to(tokyo_map)  

# show duty-free stores
dataset = zip(shop_df['lat'],  shop_df['lng'], 
              shop_df['name'], shop_df['rating'], 
              shop_df['user_ratings_total'])
for lat, lng, name, rating, users in dataset:
    label = str(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=(users//100 +1)*5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(tokyo_map)  
    folium.Map()
tokyo_map

#### Clustering duty-free stores using K-Means

In [0]:
from sklearn.cluster import KMeans

k_means = KMeans(init='k-means++', n_clusters=5, n_init=20)
k_means.fit(shop_df[['lat', 'lng']])


KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=5, n_init=20, n_jobs=None, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=0)

Clustering the duty-free stors to area

In [0]:
kmeans_labels = k_means.labels_
kmeans_labels

array([3, 1, 0, 1, 0, 0, 1, 1, 0, 3, 0, 2, 3, 3, 3, 2, 1, 1, 0, 3, 3, 3,
       0, 1, 0, 1, 0, 3, 0, 3, 0, 2, 1, 0, 3, 2, 4, 1, 2, 4, 3, 1, 0, 1,
       1, 1, 0, 0, 0, 2], dtype=int32)

Show cluster centers

In [0]:
k_means_cluster_centers = k_means.cluster_centers_
k_means_cluster_centers

array([[ 35.69822311, 139.70469552],
       [ 35.70258741, 139.77491478],
       [ 35.64509302, 139.71736678],
       [ 35.669981  , 139.76325123],
       [ 35.63376105, 139.7904752 ]])

Show the shooping areas on map.

In [0]:
# center = park_df.loc[0][['lat','lng']]
# tokyo_map = folium.Map(location=center, zoom_start=12)

kc = k_means_cluster_centers
i = 0
for center in k_means_cluster_centers:
    # label = borough
    # label = folium.Popup(label, parse_html=True)
    r = np.sum(kmeans_labels == i) 
    print(i,r)
    folium.CircleMarker(
        center,
        radius= str(r),
        # popup=label,
        color='red',
        fill=True,
        fill_color='#ff0000',
        fill_opacity=0.7,
        parse_html=False).add_to(tokyo_map)  
    i += 1

# # show parking areas
# dataset =  zip(park_df['lat'],  park_df['lng'], 
#                park_df['Name'], park_df['Number'])
# for lat, lng, name, number in dataset:
#     label = '{}, {}'.format(name, number)
#     label = folium.Popup(label, parse_html=True)
#     # number = 1/ (1+ np.exp(number))
#     folium.CircleMarker(
#         [lat, lng],
#         radius= (number//100 +1)*8,
#         popup=label,
#         color='green',
#         fill=True,
#         fill_color='#00ff00',
#         fill_opacity=0.7,
#         parse_html=False).add_to(tokyo_map)  

tokyo_map

0 16
1 14
2 6
3 12
4 2


#### Calculate the midpoint between parking and shopping area

In [0]:
import numpy as np
from scipy.spatial import distance
import math

# a,b=[[35.669875,139.747833]], [[ 35.669981  , 139.76325123]]
# distance.cdist(a,b)

a,b=[35.669875,139.747833], [ 35.669981 , 139.76325123]
def distance(a, b):
  c = (a[0] - b[0])**2 + (a[1]-b[1])**2
  return math.sqrt(c)

def midpoint(a,b):
  return (a[0]+b[0])/2, (a[1]+b[1])/2

print(distance(a,b))
print(midpoint(a,b))

0.015418594369550424
(35.669928, 139.75554211500003)


In [0]:
distance1 = []
midpoint1 = []
for lat,lng in zip(park_df['lat'], park_df['lng']):
  a, b = [lat,lng],  [major_area[0]['lat'], major_area[0]['lng']]
  d = distance(a, b)
  m = midpoint(a, b)
  # print(d, m)
  distance1.append(d)
  midpoint1.append(m)

park_df['distance1'] = distance1
park_df['midpoint1'] = midpoint1
park_df.head()

Unnamed: 0.1,Unnamed: 0,Name,Address,Number,lat,lng,distance1,midpoint1
0,0,北の丸公園第三駐車場,東京都千代田区北の丸公園１番,20.0,35.691279,139.749237,0.026471,"(35.680397075, 139.75677114939998)"
1,1,丸ノ内鍛冶橋駐車場,東京都千代田区丸の内３丁目８番２号,22.0,35.677149,139.766105,0.007843,"(35.673332224999996, 139.76520554939998)"
2,2,靖国神社外苑駐車場,東京都千代田区九段北２丁目１番１号,23.0,35.694195,139.745354,0.031116,"(35.681855025000004, 139.7548299494)"
3,4,市場橋駐車場（観光バス）,東京都中央区築地４丁目１５番２号,9.0,35.665995,139.768744,0.005665,"(35.667755475, 139.7665247494)"
4,5,タイムズ晴海４丁目バスプール,東京都中央区晴海4-6,8.0,35.653477,139.780225,0.022598,"(35.661496325, 139.7722652494)"


In [0]:
park_df[park_df['distance1'] == park_df['distance1'].min(axis=0)]

Unnamed: 0.1,Unnamed: 0,Name,Address,Number,lat,lng,distance1,midpoint1
3,4,市場橋駐車場（観光バス）,東京都中央区築地４丁目１５番２号,9.0,35.665995,139.768744,0.005665,"(35.667755475, 139.7665247494)"


In [0]:
park_df.iloc[park_df['distance1'].idxmin()]

Unnamed: 0                                 4
Name                            市場橋駐車場（観光バス）
Address                     東京都中央区築地４丁目１５番２号
Number                                     9
lat                                   35.666
lng                                  139.769
distance1                          0.0056648
midpoint1     (35.667755475, 139.7665247494)
Name: 3, dtype: object