# IBM Capstone
# The Battle of Neighborhoods: Shopping Mall Location Selection in St. Louis 

## This notebook will be mainly used for the capstone project

## Author: Ivan Yu

## Week 5

### Import Packages

In [1]:
import numpy as np
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
import json
from geopy.geocoders import Nominatim
import geocoder
import requests 
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
print("Libraries imported.")

Libraries imported.


### Extract data from Websites

In [22]:
from bs4 import BeautifulSoup
source=requests.get('https://en.wikipedia.org/wiki/List_of_neighborhoods_of_St._Louis').text
soup = BeautifulSoup(source, 'lxml')
match = soup.title.text
print(match)

List of neighborhoods of St. Louis - Wikipedia


In [23]:
right_table=soup.find('table', class_='wikitable sortable')

In [31]:
Neighborhood=[]
# add the information of PostalCode, Borough, and Neighborhood to corresponding list
for row in right_table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==9:
        Neighborhood.append(cells[0].find(text=True))
stl_df=pd.DataFrame(Neighborhood,columns=['Neighborhood'])
print(len(stl_df))
stl_df.head()

79


Unnamed: 0,Neighborhood
0,Academy
1,Baden
2,Benton Park
3,Benton Park West
4,Bevo Mill


### Get the geographical coordinates

In [32]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, St. Louis, USA'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords
coords = [ get_latlng(neighborhood) for neighborhood in stl_df["Neighborhood"].tolist() ]


In [33]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

# merge the coordinates into the original dataframe
stl_df['Latitude'] = df_coords['Latitude']
stl_df['Longitude'] = df_coords['Longitude']

# check the neighborhoods and the coordinates
print(stl_df.shape)
stl_df.head()

(79, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Academy,38.67728,-90.50662
1,Baden,38.70553,-90.23001
2,Benton Park,38.59962,-90.21888
3,Benton Park West,38.59766,-90.23097
4,Bevo Mill,38.58386,-90.26638


### Create a map of Kuala Lumpur 

In [34]:
address = 'St. Louis, USA'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of St. Louis, USA {}, {}.'.format(latitude, longitude))

The geograpical coordinate of St. Louis, USA 38.6264178, -90.1998378.


In [38]:
# Create map of St. Louis using latitude and longitude values
map_stl = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to the map
for lat, lng, neighborhood in zip(stl_df['Latitude'], stl_df['Longitude'],\
                                  stl_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_stl)  
    
map_stl

### Explore the neighborhoods with Foursquare API

In [40]:
# Define Foursquare Credentials and Version
CLIENT_ID = 'MAWJO5CWOU31KBB0WTOVYQKXZQJ3V22ITALFKBQLVDMBOS4X' # your Foursquare ID
CLIENT_SECRET = 'HTZ4L4FZY15MVYA5CK1NN1IZ5RGRV20LARKUWHBB5DVGI0BE' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 100 # limit of number of venues returned by Foursquare API

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: MAWJO5CWOU31KBB0WTOVYQKXZQJ3V22ITALFKBQLVDMBOS4X
CLIENT_SECRET:HTZ4L4FZY15MVYA5CK1NN1IZ5RGRV20LARKUWHBB5DVGI0BE


### Get the top 100 venues that are within a radius of 500 meters.

In [49]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [52]:
stl_venues = getNearbyVenues(names=stl_df['Neighborhood'],\
                                 latitudes=stl_df['Latitude'],\
                                 longitudes=stl_df['Longitude'])

Academy
Baden
Benton Park
Benton Park West
Bevo Mill
Botanical Heights
Boulevard Heights
Carondelet
Carr Square
Central West End
Cheltenham
Clayton-Tamm
Clifton Heights
College Hill
Columbus Square
Compton Heights
DeBaliviere Place
Downtown
Downtown West
Dutchtown
Ellendale
Fairground
Forest Park Southeast
Fountain Park
Fox Park
Franz Park
The Gate District
Grand Center
Gravois Park
Greater Ville
Hamilton Heights
The Hill
Hi-Pointe
Holly Hills
Hyde Park
JeffVanderLou
Kings Oak
Kingsway East
Kingsway West
Kosciusko
Lafayette Square
LaSalle Park
Lewis Place
Lindenwood Park
Marine Villa
Mark Twain
Mark Twain/I-70 Industrial
McKinley Heights
Midtown
Mount Pleasant
Near North Riverfront
North Hampton
North Point
North Riverfront
O’Fallon
Old North St. Louis
Patch
Peabody Darst Webbe
Penrose
Princeton Heights
Riverview
Shaw
Skinker-DeBaliviere
Soulard
South Hampton
Southwest Garden
St. Louis Hills
St. Louis Place
Tiffany
Tower Grove East
Tower Grove South
Vandeventer
The Ville
Visitation Par

In [53]:
print(stl_venues.shape)
stl_venues.head()

(806, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Academy,38.67728,-90.50662,McArthurs Bakery,38.679504,-90.506024,Bakery
1,Academy,38.67728,-90.50662,Hunan Empress,38.679268,-90.504412,Chinese Restaurant
2,Academy,38.67728,-90.50662,Saint Louis Bread Co.,38.679104,-90.502683,Bakery
3,Academy,38.67728,-90.50662,Seoul Taco,38.678963,-90.50142,Fast Food Restaurant
4,Academy,38.67728,-90.50662,Viviano's Festa Italiano,38.67992,-90.504013,Café


In [54]:
len(stl_venues['Neighborhood'].unique())

78

### Let's check how many venues were returned for each neighborhood

In [55]:
stl_venues.groupby('Neighborhood').count()
stl_venues.reset_index(drop=True)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Academy,38.67728,-90.50662,McArthurs Bakery,38.679504,-90.506024,Bakery
1,Academy,38.67728,-90.50662,Hunan Empress,38.679268,-90.504412,Chinese Restaurant
2,Academy,38.67728,-90.50662,Saint Louis Bread Co.,38.679104,-90.502683,Bakery
3,Academy,38.67728,-90.50662,Seoul Taco,38.678963,-90.50142,Fast Food Restaurant
4,Academy,38.67728,-90.50662,Viviano's Festa Italiano,38.67992,-90.504013,Café
5,Academy,38.67728,-90.50662,Pizza Hut,38.679074,-90.503525,Pizza Place
6,Academy,38.67728,-90.50662,Domino's Pizza,38.679539,-90.506528,Pizza Place
7,Academy,38.67728,-90.50662,Best Bowling Pro Shop,38.679678,-90.504626,Sporting Goods Shop
8,Academy,38.67728,-90.50662,Four Seasons Country Club,38.676761,-90.502367,American Restaurant
9,Academy,38.67728,-90.50662,Montgomery Bank,38.68101,-90.505983,Bank


In [56]:
print('There are {} uniques categories.'.format(len(stl_venues['Venue Category'].unique())))

There are 191 uniques categories.


### Analyze Each Neighborhood

In [59]:
# one hot encoding
stl_onehot = pd.get_dummies(stl_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
stl_onehot['Neighborhood'] = stl_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [stl_onehot.columns[-1]] + list(stl_onehot.columns[:-1])
stl_onehot = stl_onehot[fixed_columns]


stl_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Advertising Agency,Afghan Restaurant,American Restaurant,Aquarium,Arcade,Art Gallery,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bistro,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Burger Joint,Bus Line,Bus Station,Business Service,Cafeteria,Café,Cajun / Creole Restaurant,Candy Store,Cantonese Restaurant,Child Care Service,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cricket Ground,Cruise,Deli / Bodega,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Festival,Filipino Restaurant,Fish & Chips Shop,Flower Shop,Food,Food Court,Food Truck,Football Stadium,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gas Station,Gastropub,Gay Bar,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Health & Beauty Service,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indoor Play Area,Insurance Office,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Lake,Latin American Restaurant,Laundromat,Light Rail Station,Liquor Store,Locksmith,Lounge,Market,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,Museum,Music Store,Music Venue,National Park,New American Restaurant,Nightclub,Noodle House,Opera House,Optical Shop,Outdoor Sculpture,Outdoors & Recreation,Park,Pawn Shop,Performing Arts Venue,Pet Store,Pharmacy,Photography Studio,Piano Bar,Pizza Place,Playground,Plaza,Pool,Pub,Rental Car Location,Rental Service,Restaurant,River,Rock Club,Rugby Pitch,Sake Bar,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Skate Park,Smoke Shop,Smoothie Shop,Snack Place,Southern / Soul Food Restaurant,Spa,Sporting Goods Shop,Sports Bar,Stables,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Tapas Restaurant,Tennis Court,Thai Restaurant,Theater,Theme Park Ride / Attraction,Thrift / Vintage Store,Track,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Waste Facility,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Academy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Academy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Academy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Academy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Academy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [116]:
# Have a look at where is a Shopping Mall
stl_onehot[stl_onehot['Shopping Mall'].isin([1])]

Unnamed: 0,Neighborhood,ATM,Accessories Store,Advertising Agency,Afghan Restaurant,American Restaurant,Aquarium,Arcade,Art Gallery,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bistro,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Burger Joint,Bus Line,Bus Station,Business Service,Cafeteria,Café,Cajun / Creole Restaurant,Candy Store,Cantonese Restaurant,Child Care Service,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cricket Ground,Cruise,Deli / Bodega,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Festival,Filipino Restaurant,Fish & Chips Shop,Flower Shop,Food,Food Court,Food Truck,Football Stadium,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gas Station,Gastropub,Gay Bar,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Health & Beauty Service,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indoor Play Area,Insurance Office,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Lake,Latin American Restaurant,Laundromat,Light Rail Station,Liquor Store,Locksmith,Lounge,Market,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,Museum,Music Store,Music Venue,National Park,New American Restaurant,Nightclub,Noodle House,Opera House,Optical Shop,Outdoor Sculpture,Outdoors & Recreation,Park,Pawn Shop,Performing Arts Venue,Pet Store,Pharmacy,Photography Studio,Piano Bar,Pizza Place,Playground,Plaza,Pool,Pub,Rental Car Location,Rental Service,Restaurant,River,Rock Club,Rugby Pitch,Sake Bar,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Skate Park,Smoke Shop,Smoothie Shop,Snack Place,Southern / Soul Food Restaurant,Spa,Sporting Goods Shop,Sports Bar,Stables,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Tapas Restaurant,Tennis Court,Thai Restaurant,Theater,Theme Park Ride / Attraction,Thrift / Vintage Store,Track,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Waste Facility,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
111,Carondelet,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [219]:
# Have a look at where is a Supermarket
stl_onehot[stl_onehot['Supermarket'].isin([1])]

Unnamed: 0,Neighborhood,ATM,Accessories Store,Advertising Agency,Afghan Restaurant,American Restaurant,Aquarium,Arcade,Art Gallery,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bistro,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Burger Joint,Bus Line,Bus Station,Business Service,Cafeteria,Café,Cajun / Creole Restaurant,Candy Store,Cantonese Restaurant,Child Care Service,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cricket Ground,Cruise,Deli / Bodega,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Festival,Filipino Restaurant,Fish & Chips Shop,Flower Shop,Food,Food Court,Food Truck,Football Stadium,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gas Station,Gastropub,Gay Bar,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Health & Beauty Service,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indoor Play Area,Insurance Office,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Lake,Latin American Restaurant,Laundromat,Light Rail Station,Liquor Store,Locksmith,Lounge,Market,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,Museum,Music Store,Music Venue,National Park,New American Restaurant,Nightclub,Noodle House,Opera House,Optical Shop,Outdoor Sculpture,Outdoors & Recreation,Park,Pawn Shop,Performing Arts Venue,Pet Store,Pharmacy,Photography Studio,Piano Bar,Pizza Place,Playground,Plaza,Pool,Pub,Rental Car Location,Rental Service,Restaurant,River,Rock Club,Rugby Pitch,Sake Bar,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Skate Park,Smoke Shop,Smoothie Shop,Snack Place,Southern / Soul Food Restaurant,Spa,Sporting Goods Shop,Sports Bar,Stables,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Tapas Restaurant,Tennis Court,Thai Restaurant,Theater,Theme Park Ride / Attraction,Thrift / Vintage Store,Track,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Waste Facility,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
103,Carondelet,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
621,Peabody Darst Webbe,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [60]:
len(stl_venues['Neighborhood'].unique())

78

### And let's examine the new dataframe size.

In [61]:
stl_onehot.shape

(806, 192)

### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [118]:
stl_sum = stl_onehot.groupby('Neighborhood').sum().reset_index()
stl_sum.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Advertising Agency,Afghan Restaurant,American Restaurant,Aquarium,Arcade,Art Gallery,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bistro,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Burger Joint,Bus Line,Bus Station,Business Service,Cafeteria,Café,Cajun / Creole Restaurant,Candy Store,Cantonese Restaurant,Child Care Service,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cricket Ground,Cruise,Deli / Bodega,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Festival,Filipino Restaurant,Fish & Chips Shop,Flower Shop,Food,Food Court,Food Truck,Football Stadium,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gas Station,Gastropub,Gay Bar,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Health & Beauty Service,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indoor Play Area,Insurance Office,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Lake,Latin American Restaurant,Laundromat,Light Rail Station,Liquor Store,Locksmith,Lounge,Market,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,Museum,Music Store,Music Venue,National Park,New American Restaurant,Nightclub,Noodle House,Opera House,Optical Shop,Outdoor Sculpture,Outdoors & Recreation,Park,Pawn Shop,Performing Arts Venue,Pet Store,Pharmacy,Photography Studio,Piano Bar,Pizza Place,Playground,Plaza,Pool,Pub,Rental Car Location,Rental Service,Restaurant,River,Rock Club,Rugby Pitch,Sake Bar,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Skate Park,Smoke Shop,Smoothie Shop,Snack Place,Southern / Soul Food Restaurant,Spa,Sporting Goods Shop,Sports Bar,Stables,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Tapas Restaurant,Tennis Court,Thai Restaurant,Theater,Theme Park Ride / Attraction,Thrift / Vintage Store,Track,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Waste Facility,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Academy,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Baden,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Benton Park,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,2,0,0,0,1,1,2,0,0,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
3,Benton Park West,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Bevo Mill,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Let's calculate the sum of caterings, entertainments and cafes in each neighborhood

In [168]:
venues=stl_sum.columns.tolist()

caterings=[]
for i in range(len(venues)):
    if ('Restaurant' in venues[i]) or ('BBQ' in venues[i])\
    or ('Burger' in venues[i]) or ('Chicken' in venues[i])\
    or ('Noodle' in venues[i]) or ('Pizza' in venues[i])\
    or ('Sandwich' in venues[i]) or ('Steakhouse' in venues[i])\
    or ('Taco' in venues[i]) or ('Wings' in venues[i])\
    or ('Dog' in venues[i]) or ('Fish' in venues[i]):
        caterings.append(venues[i])
        
entertainments=[]
for i in range(len(venues)):
    if ('Bar' in venues[i]) or ('Pub' in venues[i])\
    or ('Club' in venues[i]) or ('Beer' in venues[i])\
    or ('Theater' in venues[i]):
        entertainments.append(venues[i])
        
cafes=[]        
for i in range(len(venues)):
    if ('Cafeteria' in venues[i]) or ('Café' in venues[i])\
    or ('Coffee' in venues[i]) or ('Dessert' in venues[i])\
    or ('Bakery' in venues[i]) or ('Breakfast' in venues[i])\
    or ('Yogurt' in venues[i]) or ('Cream' in venues[i])\
    or ('Diner' in venues[i]) or ('Smoothie' in venues[i]):
        cafes.append(venues[i])

In [185]:
from pandas import DataFrame

number_of_caterings=[]
number_of_entertainments=[]
number_of_cafes=[]
for i in range(len(stl_sum)):
    
    sum=0
    for catering in caterings:
        sum=sum+stl_sum.loc[i,catering]
    number_of_caterings.append(sum)
    
    sum=0
    for entertainment in entertainments:
        sum=sum+stl_sum.loc[i,entertainment]
    number_of_entertainments.append(sum)
    
    sum=0
    for cafe in cafes:
        sum=sum+stl_sum.loc[i,cafe]
    number_of_cafes.append(sum)
    
data={'Neighborhood':stl_sum['Neighborhood'],
      'Caterings':number_of_caterings,
      'Entertainments':number_of_entertainments,
      'Cafes':number_of_cafes}
new_stl_sum=DataFrame(data)
new_stl_sum

Unnamed: 0,Neighborhood,Caterings,Entertainments,Cafes
0,Academy,5,0,3
1,Baden,2,0,0
2,Benton Park,6,8,6
3,Benton Park West,10,1,1
4,Bevo Mill,7,0,0
5,Botanical Heights,3,1,2
6,Boulevard Heights,1,1,2
7,Carondelet,4,1,3
8,Carr Square,2,0,0
9,Central West End,8,5,1


### Run K-Means Clustering

In [186]:
# Run *k*-means to cluster the neighborhood into 3 clusters.

# set number of clusters
kclusters = 3

new_stl_sum_clustering = new_stl_sum.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(new_stl_sum_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 0, 1, 1, 2, 2, 0, 2, 0, 1], dtype=int32)

In [187]:
# Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

# add clustering labels
new_stl_sum.insert(0, 'Cluster Labels', kmeans.labels_)

stl_merged = new_stl_sum

# add latitude/longitude for each neighborhood
stl_merged = stl_merged.join(stl_df.set_index('Neighborhood'), on='Neighborhood')
stl_merged=stl_merged.dropna(axis=0,how='any').reset_index(drop=True)

stl_merged.head() # check the last columns!

Unnamed: 0,Cluster Labels,Neighborhood,Caterings,Entertainments,Cafes,Latitude,Longitude
0,2,Academy,5,0,3,38.67728,-90.50662
1,0,Baden,2,0,0,38.70553,-90.23001
2,1,Benton Park,6,8,6,38.59962,-90.21888
3,1,Benton Park West,10,1,1,38.59766,-90.23097
4,2,Bevo Mill,7,0,0,38.58386,-90.26638


In [248]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(stl_merged['Latitude'],\
                                  stl_merged['Longitude'],\
                                  stl_merged['Neighborhood'],\
                                  stl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Exam the cluster

### Cluster 0: Least Flourishing (Red)

In [193]:
stl_merged.loc[stl_merged['Cluster Labels'] == 0, \
                   stl_merged.columns[[0] + list(range(1, stl_merged.shape[1]))]]

Unnamed: 0,Cluster Labels,Neighborhood,Caterings,Entertainments,Cafes,Latitude,Longitude
1,0,Baden,2,0,0,38.70553,-90.23001
6,0,Boulevard Heights,1,1,2,38.56202,-90.27723
8,0,Carr Square,2,0,0,38.63901,-90.19949
10,0,Cheltenham,1,1,1,38.62733,-90.27976
13,0,College Hill,0,0,0,38.674,-90.20878
14,0,Columbus Square,0,1,1,38.63691,-90.18942
15,0,Compton Heights,1,1,3,38.61298,-90.23626
16,0,DeBaliviere Place,2,1,1,38.64947,-90.27809
21,0,Fairground,2,0,0,38.66745,-90.21776
23,0,Fountain Park,0,0,0,38.65787,-90.25942


### Cluster 1: Most Flourishing (Purple)

In [195]:
stl_merged.loc[stl_merged['Cluster Labels'] == 1, \
                   stl_merged.columns[[0] + list(range(1, stl_merged.shape[1]))]]

Unnamed: 0,Cluster Labels,Neighborhood,Caterings,Entertainments,Cafes,Latitude,Longitude
2,1,Benton Park,6,8,6,38.59962,-90.21888
3,1,Benton Park West,10,1,1,38.59766,-90.23097
9,1,Central West End,8,5,1,38.6417,-90.25032
17,1,Downtown,8,4,1,38.62549,-90.1903
22,1,Forest Park Southeast,10,9,1,38.62695,-90.25709
39,1,Lafayette Square,7,4,2,38.61756,-90.2145
59,1,Skinker-DeBaliviere,13,1,3,38.65194,-90.29405
60,1,Soulard,5,10,3,38.60418,-90.20829
61,1,South Hampton,10,3,5,38.59356,-90.29567
66,1,The Hill,14,3,4,38.61831,-90.27603


### Cluster 2: Flourishing (Mint Green)

In [196]:
stl_merged.loc[stl_merged['Cluster Labels'] == 2, \
                   stl_merged.columns[[0] + list(range(1, stl_merged.shape[1]))]]

Unnamed: 0,Cluster Labels,Neighborhood,Caterings,Entertainments,Cafes,Latitude,Longitude
0,2,Academy,5,0,3,38.67728,-90.50662
4,2,Bevo Mill,7,0,0,38.58386,-90.26638
5,2,Botanical Heights,3,1,2,38.62096,-90.25063
7,2,Carondelet,4,1,3,38.55639,-90.26643
11,2,Clayton-Tamm,5,2,4,38.62699,-90.29081
12,2,Clifton Heights,5,1,1,38.6114,-90.2917
18,2,Downtown West,4,4,3,38.62975,-90.20625
19,2,Dutchtown,4,0,1,38.58063,-90.24567
20,2,Ellendale,9,0,1,38.61306,-90.31181
25,2,Franz Park,5,0,0,38.62217,-90.30393


## Next, Let us add the exisiting Shopping Mall and Supermarkets to the Map

### Recall that Carondelet and Peabody Darst Webbe has a Shopping Mall or a Supermarket.

In [221]:
stl_merged

Unnamed: 0,Cluster Labels,Neighborhood,Caterings,Entertainments,Cafes,Latitude,Longitude
0,2,Academy,5,0,3,38.67728,-90.50662
1,0,Baden,2,0,0,38.70553,-90.23001
2,1,Benton Park,6,8,6,38.59962,-90.21888
3,1,Benton Park West,10,1,1,38.59766,-90.23097
4,2,Bevo Mill,7,0,0,38.58386,-90.26638
5,2,Botanical Heights,3,1,2,38.62096,-90.25063
6,0,Boulevard Heights,1,1,2,38.56202,-90.27723
7,2,Carondelet,4,1,3,38.55639,-90.26643
8,0,Carr Square,2,0,0,38.63901,-90.19949
9,1,Central West End,8,5,1,38.6417,-90.25032


In [225]:
existing=stl_merged[stl_merged['Neighborhood']\
                    .isin(['Carondelet','Peabody Darst Webbe'])]

In [226]:
existing

Unnamed: 0,Cluster Labels,Neighborhood,Caterings,Entertainments,Cafes,Latitude,Longitude
7,2,Carondelet,4,1,3,38.55639,-90.26643
55,0,Peabody Darst Webbe,2,1,2,38.61573,-90.20734


In [249]:
for lat, lon, poi in zip(existing['Latitude'],\
                                  existing['Longitude'],\
                                  existing['Neighborhood']):
    label = folium.Popup(str(poi) + ' Shopping Mall/Supermarket', parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=30,
        popup=label,
        color='Yellow',
        fill=True,
        fill_color='Yellow',
        fill_opacity=0.5).add_to(map_clusters)
map_clusters

# Conclusion

## We should choose Purple point outside the yellow circles as the ideal location to open a new shopping mall for the following reasons.

### 1. We cluster all the neighborhoods according to:
### Caterings (including Restaurant, Burger, etc.)
### Entertainments (including Bar, Club, Theater)
### Cafes (including Cafe, Breakfast, Dessert, etc.). 

### Red points represent those least flourishing, Purple points represent those most flourishing, and Mint Green points represent those just middle. We should open a new Shopping Mall at a flourishing location. So we should choose Purple point as our location.


### 2. Considering that we should avoid the potential competition with the existing Shopping Malls and Supermarkets, we mark the existing Shopping Malls and Supermarkets in St. Louis on the map. And each has a 'Scope of Influence',  which is represented by the larger yellow circle. We should choose the location outside these yellow circles.