## Martin Palkovic's IBM Data Science Capstone Project

###### By Martin Palkovic, Geoscientist at Colorado School of Mines

##### import libraries

In [2]:
import pandas as pd
#pd.options.mode.chained_assignment = None
pd.set_option("display.max_rows", None, "display.max_columns", None)
import geopandas as gpd

import numpy as np
import geocoder as gc

import json
import requests
import random

import folium
from folium import plugins
from folium.features import GeoJson, GeoJsonTooltip, GeoJsonPopup

from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim
from matplotlib import cm, colors

##### For this project, I downloaded neighborhood data from the city of Denver website: https://www.denvergov.org/opendata/dataset/city-and-county-of-denver-statistical-neighborhoods

##### Read the data into a Pandas dataframe, and clean it up a bit:

In [3]:
neighborhoods = gpd.read_file(r'/Users/martinpalkovic/Downloads/statistical_neighborhoods/statistical_neighborhoods.shp')
neighborhoods.to_file('Denver_Neighborhoods.json', driver = 'GeoJSON')
neighborhoods.to_crs('EPSG:4326')
df = neighborhoods.drop(columns = ['TYPOLOGY','NOTES'])
df = df.rename(columns = {'NBHD_NAME': 'Neighborhood'})
print (df.dtypes)
df.head()

NBHD_ID            int64
Neighborhood      object
geometry        geometry
dtype: object


Unnamed: 0,NBHD_ID,Neighborhood,geometry
0,2,Auraria,"POLYGON ((-105.00042 39.74552, -105.00041 39.7..."
1,21,Cory - Merrill,"POLYGON ((-104.94070 39.69540, -104.94070 39.6..."
2,7,Belcaro,"POLYGON ((-104.94070 39.71156, -104.94069 39.7..."
3,70,Washington Park,"POLYGON ((-104.95931 39.71566, -104.95931 39.7..."
4,71,Washington Park West,"POLYGON ((-104.97342 39.68982, -104.97356 39.6..."


##### Cast the 'Neighborhood' column to type 'string' to avoid any issues with the data down the line:

In [4]:
df.astype({'Neighborhood': 'string'}).dtypes

NBHD_ID            int64
Neighborhood      string
geometry        geometry
dtype: object

##### Get the latitude/longitude of the center of each neighborhood:

In [5]:
#This cell throws a warning that I'm not sure how to rectify - 
#I ran the '.to_crs()' method above, and still get the warning. 
#Note: this warning does not hinder the analysis in any way

def get_xy(pt):
    return (pt.x, pt.y)
centroidseries = neighborhoods['geometry'].centroid
x,y = [list(t) for t in zip(*map(get_xy, centroidseries))]

df['Longitude'] = x
df['Latitude'] = y
df.head() 


  centroidseries = neighborhoods['geometry'].centroid


Unnamed: 0,NBHD_ID,Neighborhood,geometry,Longitude,Latitude
0,2,Auraria,"POLYGON ((-105.00042 39.74552, -105.00041 39.7...",-105.008267,39.745821
1,21,Cory - Merrill,"POLYGON ((-104.94070 39.69540, -104.94070 39.6...",-104.949822,39.690462
2,7,Belcaro,"POLYGON ((-104.94070 39.71156, -104.94069 39.7...",-104.950477,39.705044
3,70,Washington Park,"POLYGON ((-104.95931 39.71566, -104.95931 39.7...",-104.966267,39.701238
4,71,Washington Park West,"POLYGON ((-104.97342 39.68982, -104.97356 39.6...",-104.979904,39.702575


##### Use geolocator to grab the latitude and longitude coordinates for the city of Denver, to use on our Folium maps:

In [6]:
address = 'Denver, CO'

geolocator = Nominatim(user_agent = 'denver_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print ('The geographical coordinates of Denver are {}, {}'.format(latitude, longitude))

The geographical coordinates of Denver are 39.7392364, -104.9848623


##### Populater a dictionary of random hexadecimal colors for each neighborhood to use on the maps:

In [7]:
color_dict = {}
hoodList = df.Neighborhood.unique()

for i in hoodList:
    color = lambda: random.randint(0, 255)
    value = '#%02X%02X%02X' % (color(), color(), color())
    color_dict[i] = value
    
color_dict

{'Auraria': '#D12DF6',
 'Cory - Merrill': '#91E9A6',
 'Belcaro': '#F4A91C',
 'Washington Park': '#3D4F4C',
 'Washington Park West': '#CDF5AF',
 'Speer': '#D6EA7E',
 'Cherry Creek': '#FD20E6',
 'Country Club': '#51ED41',
 'Congress Park': '#75137A',
 'City Park': '#5F7BA0',
 'Marston': '#8D14C6',
 'Fort Logan': '#364BF9',
 'Washington Virginia Vale': '#1A0836',
 'Barnum': '#E38B9D',
 'Barnum West': '#30CF59',
 'West Colfax': '#D87A24',
 'West Highland': '#81CAA5',
 'Sloan Lake': '#F4297B',
 'Berkeley': '#C68832',
 'Regis': '#D2A3BC',
 'Chaffee Park': '#12EBEC',
 'Highland': '#6A8D99',
 'Athmar Park': '#8CF6DB',
 'Wellshire': '#F3EEE9',
 'University': '#0BFA31',
 'Rosedale': '#B7C4D4',
 'Cheesman Park': '#0BDEAB',
 'Hilltop': '#541622',
 'Montclair': '#B7C8F3',
 'Hale': '#3B0D22',
 'North Park Hill': '#774728',
 'South Park Hill': '#05B18A',
 'University Park': '#273B1F',
 'Platt Park': '#E08AD3',
 'Overland': '#C1BF7E',
 'Ruby Hill': '#CB7BC4',
 'Kennedy': '#DCE4F1',
 'Hampden': '#3A8A5

##### Plot the neighborhoods!

In [8]:
#create map
denver_map = folium.Map(location = [latitude, longitude], zoom_start = 12)

den_colors = color_dict


def style_function(feature):
    return {'opacity':1,
            'weight': 0,
            'fillOpacity': 0.5,
            'fillColor': den_colors[feature['properties']['NBHD_NAME']]}

tooltip = GeoJsonTooltip(fields = ['NBHD_NAME'],
                                   aliases = ['Neighborhood'],
                                   localize = True,
                                   sticky = False,
                                   labels = True,
                                   style = """
                                   background-color: #F0EFEF;
                                   border: 2px solid black;
                                   border-radius: 3px;
                                   box-shadow: 3px;
                                   """,
                                   max_width = 800
                                   )

folium.GeoJson(neighborhoods,
              name = 'Denver Neighborhoods',
              style_function = style_function,
              tooltip = tooltip).add_to(denver_map)

folium.LayerControl(collapsed = False).add_to(denver_map)
denver_map

#### In this next part, we're going to call the Foursquare API. Foursquare has similar data to Yelp or Google reviews, but offers a great service for developers through their API to pull that data off the web for free:

##### Foursquare credentials:

In [9]:
CLIENT_ID = 'ID34KQWDTV2F2ILHSLIDBN3GVGEASML44MVYBAAKOX11U5OX' 
CLIENT_SECRET = 'II2LCTJEKTYAY4ONPEDJYHTVKEFRMZCKTHEDTIYVNYQ0MCV0'
VERSION = '20180604'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ID34KQWDTV2F2ILHSLIDBN3GVGEASML44MVYBAAKOX11U5OX
CLIENT_SECRET:II2LCTJEKTYAY4ONPEDJYHTVKEFRMZCKTHEDTIYVNYQ0MCV0


##### Define a function to pull data from the Foursquare API:

In [10]:
def getNearbyVenues(names, latitudes, longitudes):
    radius = 500
    LIMIT = 100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

##### Call the Foursquare API:

In [11]:
denver_venues = getNearbyVenues(names = df['Neighborhood'],
                                latitudes = df['Latitude'],
                                longitudes = df['Longitude'])

Auraria
Cory - Merrill
Belcaro
Washington Park
Washington Park West
Speer
Cherry Creek
Country Club
Congress Park
City Park
Marston
Fort Logan
Washington Virginia Vale
Barnum
Barnum West
West Colfax
West Highland
Sloan Lake
Berkeley
Regis
Chaffee Park
Highland
Athmar Park
Wellshire
University
Rosedale
Cheesman Park
Hilltop
Montclair
Hale
North Park Hill
South Park Hill
University Park
Platt Park
Overland
Ruby Hill
Kennedy
Hampden
Southmoor Park
Hampden South
Indian Creek
Goldsmith
University Hills
Harvey Park
Mar Lee
East Colfax
Capitol Hill
North Capitol Hill
Civic Center
CBD
Union Station
Central Park
Montbello
Lowry Field
Gateway - Green Valley Ranch
Harvey Park South
College View - South Platte
City Park West
Sun Valley
Valverde
Villa Park
Five Points
Globeville
Bear Valley
Virginia Village
Windsor
Jefferson Park
Northeast Park Hill
Elyria Swansea
Baker
Clayton
Skyland
Lincoln Park
Whittier
Cole
Westwood
Sunnyside
DIA


##### Check the size of the dataframe:

In [12]:
print (denver_venues.shape)
print (denver_venues.dtypes)
print (denver_venues['Neighborhood'].value_counts()) #prints the number of venues for each neighborhood
denver_venues.head()

(1140, 7)
Neighborhood               object
Neighborhood Latitude     float64
Neighborhood Longitude    float64
Venue                      object
Venue Latitude            float64
Venue Longitude           float64
Venue Category             object
dtype: object
CBD                             100
Cherry Creek                     91
Union Station                    90
Five Points                      68
Civic Center                     41
North Capitol Hill               38
Highland                         38
Berkeley                         37
Capitol Hill                     32
West Highland                    31
City Park West                   29
Gateway - Green Valley Ranch     28
Platt Park                       28
Washington Virginia Vale         27
Auraria                          27
Baker                            24
East Colfax                      23
City Park                        23
West Colfax                      22
Sunnyside                        21
Jefferson Park    

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Auraria,39.745821,-105.008267,Tivoli Brewing Company,39.745494,-105.005475,Brewery
1,Auraria,39.745821,-105.008267,Pepsi Center,39.748608,-105.007571,Stadium
2,Auraria,39.745821,-105.008267,Alfresco Greens,39.744905,-105.005778,Salad Place
3,Auraria,39.745821,-105.008267,SpringHill Suites Denver Downtown,39.747537,-105.003805,Hotel
4,Auraria,39.745821,-105.008267,Biker Jim's @ Auraria Campus,39.745109,-105.004855,Hot Dog Joint


##### I'm particularly fond of coffee shops and breweries, so, let's make a new dataframe of just the coffee shops and breweries for each neighborhood:

In [13]:
df_bc = denver_venues.loc[(denver_venues['Venue Category'] == 'Brewery') | (denver_venues['Venue Category'] == 'Coffee Shop')]
print(df_bc.shape)
df_bc.head()

(80, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Auraria,39.745821,-105.008267,Tivoli Brewing Company,39.745494,-105.005475,Brewery
6,Auraria,39.745821,-105.008267,Starbucks In The Tivoli,39.74473,-105.005798,Coffee Shop
7,Auraria,39.745821,-105.008267,Dazbog,39.745415,-105.007474,Coffee Shop
41,Washington Park West,39.702575,-104.979904,Wash Perk,39.702205,-104.977059,Coffee Shop
61,Cherry Creek,39.7194,-104.949281,Aviano Coffee,39.719724,-104.951316,Coffee Shop


##### Map of neighborhoods, coffee shops and breweries:

In [14]:
denver = folium.Map(location = [latitude, longitude], zoom_start = 12) 

feature_breweries = folium.FeatureGroup(name = 'Brewery')
feature_coffee = folium.FeatureGroup(name = 'Coffee Shop')

color_dict = {}
hoodList = df.Neighborhood.unique()

for i in hoodList:
    color = lambda: random.randint(0, 255)
    value = '#%02X%02X%02X' % (color(), color(), color())
    color_dict[i] = value
    
def style_function(feature):
    return {'opacity':1,
            'weight': 0,
            'fillOpacity': 0.25,
            'fillColor': den_colors[feature['properties']['NBHD_NAME']]}

tooltip = GeoJsonTooltip(fields = ['NBHD_NAME'],
                                   aliases = ['Neighborhood'],
                                   localize = True,
                                   sticky = False,
                                   labels = True,
                                   style = """
                                   background-color: #F0EFEF;
                                   border: 2px solid black;
                                   border-radius: 3px;
                                   box-shadow: 3px;
                                   """,
                                   max_width = 800
                                   )

for i, v in df_bc.iterrows():
    popup = """
    Name : <b>%s</b><br>
    Category : <b>%s</b><br>
    Neighborhood : <b>%s</b><br>
    """ % (v['Venue'], v['Venue Category'], v['Neighborhood'])
    
    if v['Venue Category'] == 'Brewery':
        folium.CircleMarker(location = [v['Venue Latitude'], v['Venue Longitude']],
                            radius = 1.5,
                            tooltip = popup,
                            color = '#67a9cf',
                            fill_color = '#67a9cf',
                            opacity = 0.6,
                            fill = True).add_to(feature_breweries)
    
    elif v['Venue Category'] == 'Coffee Shop':
        folium.CircleMarker(location = [v['Venue Latitude'], v['Venue Longitude']],
                            radius = 1.5,
                            tooltip = popup,
                            color = '#ef8a62',
                            fill_color = '#ef8a62',
                            opacity = 0.6,
                            fill = True).add_to(feature_coffee)

folium.GeoJson(neighborhoods,
              name = 'Denver Neighborhoods',
              style_function = style_function,
              tooltip = tooltip).add_to(denver)

feature_breweries.add_to(denver)
feature_coffee.add_to(denver)
folium.LayerControl(collapsed = False).add_to(denver)

denver

#### Analyze the density of coffee shops and breweries in each neighborhood

##### We'll use a technique called 'one hot encoding' here - this will quantify each venue as percentage of total venues in the neighborhood, i.e a value of 0.25 for 'coffee shop' in a given neighborhood means that 25% of the venues are coffee shops in that area. We'll use the results of this analysis to determine where a good place to build a new brewery might be.

In [15]:
#one hot encoding
denver_onehot = pd.get_dummies(denver_venues[['Venue Category']], prefix = "", prefix_sep = "")

#add neighborhood column back to the dataframe
denver_onehot['Neighborhood'] = denver_venues['Neighborhood']

#move the neighborhood column to the first column
cols = list(denver_onehot)
cols.insert(0, cols.pop(cols.index('Neighborhood')))
#cols
denver_onehot = denver_onehot.loc[:, cols]

print (denver_onehot.shape)
denver_onehot.head()

(1140, 242)


Unnamed: 0,Neighborhood,ATM,Accessories Store,African Restaurant,Airport Lounge,Airport Service,Airport Terminal,Alternative Healer,American Restaurant,Aquarium,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Assisted Living,Athletics & Sports,Auto Garage,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Stadium,Bed & Breakfast,Beer Bar,Beer Garden,Bike Rental / Bike Share,Bike Shop,Bistro,Bookstore,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Burger Joint,Burrito Place,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Carpet Store,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Churrascaria,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distillery,Dive Bar,Dog Run,Donut Shop,Electronics Store,Event Space,Exhibit,Fabric Shop,Factory,Farm,Farmers Market,Fast Food Restaurant,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Service,Food Truck,Football Stadium,Fried Chicken Joint,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,Gift Shop,Golf Course,Government Building,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Health & Beauty Service,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hookah Bar,Hot Dog Joint,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Indie Theater,Insurance Office,Intersection,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Lake,Latin American Restaurant,Leather Goods Store,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Marijuana Dispensary,Market,Massage Studio,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Movie Theater,Moving Target,Museum,Music Store,Music Venue,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Opera House,Optical Shop,Organic Grocery,Outdoor Sculpture,Outdoor Supply Store,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Photography Studio,Piano Bar,Pizza Place,Playground,Plaza,Pool Hall,Print Shop,Pub,Public Art,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Restaurant,River,Rock Club,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skating Rink,Smoke Shop,Soccer Field,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Souvenir Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Storage Facility,Supplement Shop,Sushi Restaurant,Taco Place,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Thrift / Vintage Store,Toy / Game Store,Track,Trade School,Train Station,Tree,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,Auraria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Auraria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Auraria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Auraria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Auraria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


##### Group the results by neighborhood:

In [16]:
denver_grouped = denver_onehot.groupby(['Neighborhood']).mean().reset_index()

print (denver_grouped.shape)
denver_grouped.head()

(76, 242)


Unnamed: 0,Neighborhood,ATM,Accessories Store,African Restaurant,Airport Lounge,Airport Service,Airport Terminal,Alternative Healer,American Restaurant,Aquarium,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Assisted Living,Athletics & Sports,Auto Garage,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Stadium,Bed & Breakfast,Beer Bar,Beer Garden,Bike Rental / Bike Share,Bike Shop,Bistro,Bookstore,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Burger Joint,Burrito Place,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Carpet Store,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Churrascaria,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distillery,Dive Bar,Dog Run,Donut Shop,Electronics Store,Event Space,Exhibit,Fabric Shop,Factory,Farm,Farmers Market,Fast Food Restaurant,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Service,Food Truck,Football Stadium,Fried Chicken Joint,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,Gift Shop,Golf Course,Government Building,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Health & Beauty Service,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hookah Bar,Hot Dog Joint,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Indie Theater,Insurance Office,Intersection,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Lake,Latin American Restaurant,Leather Goods Store,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Marijuana Dispensary,Market,Massage Studio,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Movie Theater,Moving Target,Museum,Music Store,Music Venue,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Opera House,Optical Shop,Organic Grocery,Outdoor Sculpture,Outdoor Supply Store,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Photography Studio,Piano Bar,Pizza Place,Playground,Plaza,Pool Hall,Print Shop,Pub,Public Art,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Restaurant,River,Rock Club,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skating Rink,Smoke Shop,Soccer Field,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Souvenir Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Storage Facility,Supplement Shop,Sushi Restaurant,Taco Place,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Thrift / Vintage Store,Toy / Game Store,Track,Trade School,Train Station,Tree,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,Athmar Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Auraria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.037037,0.074074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.074074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.037037,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.111111,0.074074,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.037037,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Baker,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.041667,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0
3,Barnum,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Barnum West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##### Let's create a new dataframe of just coffee shops and breweries:

In [17]:
#new dataframe
denver_cb = denver_grouped[['Neighborhood', 'Coffee Shop', 'Brewery']]

#Sort breweries by the smallest one hot value
#denver_cb.nsmallest(76, 'Brewery')

#computer how many neighborhoods don't have coffee shops or breweries
denver_cb.apply(lambda x: x.eq(0).sum())

Neighborhood     0
Coffee Shop     50
Brewery         63
dtype: int64

#### Given that there are 63 neighborhoods in Denver without breweries, lets proceed with just the breweries from here on out:

In [18]:
den_brew = denver_grouped[['Neighborhood', 'Brewery']]
den_brew.head()

Unnamed: 0,Neighborhood,Brewery
0,Athmar Park,0.0
1,Auraria,0.037037
2,Baker,0.041667
3,Barnum,0.0
4,Barnum West,0.0


#### Cluster neighborhoods using the K-Means algorithm

##### Define k value, initialize K-Means:

In [19]:
#set number of clusters
k = 5
den_clustering = den_brew.drop(['Neighborhood'], 1)

kmeans = KMeans(n_clusters = k, random_state = 0).fit(den_clustering)
kmeans.labels_[0:10]

array([1, 2, 2, 1, 1, 1, 1, 4, 1, 1], dtype=int32)

##### Create a new dataframe containing neighborhood, one hot values for breweries, the cluster value the neighborhood belongs to, and coordinates:

In [34]:
den_merged = den_brew.copy()

#add the cluster labels
den_merged['Cluster Labels'] = kmeans.labels_

#add the lat/lon coordinates for each neighborhood
den_merged = pd.merge(den_merged, df.drop(['NBHD_ID','geometry'], 1), on = 'Neighborhood')
den_merged.round(4)

Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Longitude,Latitude
0,Athmar Park,0.0,1,-105.0104,39.7036
1,Auraria,0.037,2,-105.0083,39.7458
2,Baker,0.0417,2,-104.9962,39.7152
3,Barnum,0.0,1,-105.0324,39.7184
4,Barnum West,0.0,1,-105.0465,39.7185
5,Bear Valley,0.0,1,-105.0655,39.6611
6,Belcaro,0.0,1,-104.9505,39.705
7,Berkeley,0.1081,4,-105.0394,39.7767
8,CBD,0.0,1,-104.9932,39.7451
9,Capitol Hill,0.0,1,-104.9802,39.7336


##### Sort the rows by their 'Cluster Label' value:

In [36]:
print (den_merged.shape)
den_merged.sort_values(['Cluster Labels'], inplace = True)
den_merged.round(4)

(76, 5)


Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Longitude,Latitude
26,Five Points,0.1324,0,-104.9834,39.7591
50,Overland,0.125,0,-104.9931,39.6814
65,Valverde,0.1429,0,-105.0152,39.7172
17,Clayton,0.0,1,-104.9501,39.7674
35,Harvey Park South,0.0,1,-105.0399,39.661
14,City Park,0.0,1,-104.9502,39.7456
13,Cherry Creek,0.0,1,-104.9493,39.7194
12,Cheesman Park,0.0,1,-104.9664,39.7346
11,Chaffee Park,0.0,1,-105.012,39.7876
10,Central Park,0.0,1,-104.8791,39.7766


##### Plot the clusters on a map!

In [22]:
den_clusters = folium.Map(location = [latitude, longitude], zoom_start = 11)

#set color scheme
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0,1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


for lat, lon, aoi, cluster in zip(den_merged['Latitude'],
                                  den_merged['Longitude'],
                                  den_merged['Neighborhood'],
                                  den_merged['Cluster Labels']):
    popup = """
    Neighborhood : <b>%s</b><br>
    Cluster : <b>%s</b><br>
    """ % (aoi, cluster)
    
    #if den_merged['Cluster Labels'] == 0:
    folium.CircleMarker(location = [lat, lon],
                        radius = 5,
                        tooltip = popup,
                        color = rainbow[cluster - 1],
                        fill = True,
                        fill_color = rainbow[cluster - 1],
                        fill_opacity = 0.7).add_to(den_clusters)

folium.GeoJson(neighborhoods,
              name = 'Denver Neighborhoods',
              style_function = style_function,
              tooltip = tooltip).add_to(den_clusters)
folium.LayerControl(collapsed = False).add_to(den_clusters)

den_clusters

#### Examine the clusters

##### Cluster 0

In [37]:
df_c0 = den_merged.loc[den_merged['Cluster Labels'] == 0].round(4)
df_c0.to_csv(r'/Users/martinpalkovic/Desktop/git/Cluster_0.csv')
df_c0

Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Longitude,Latitude
26,Five Points,0.1324,0,-104.9834,39.7591
50,Overland,0.125,0,-104.9931,39.6814
65,Valverde,0.1429,0,-105.0152,39.7172


##### Cluster 1

In [38]:
df_c1 = den_merged.loc[den_merged['Cluster Labels'] == 1].round(4)
df_c1.to_csv(r'/Users/martinpalkovic/Desktop/git/Cluster_1.csv')
df_c1

Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Longitude,Latitude
17,Clayton,0.0,1,-104.9501,39.7674
35,Harvey Park South,0.0,1,-105.0399,39.661
14,City Park,0.0,1,-104.9502,39.7456
13,Cherry Creek,0.0,1,-104.9493,39.7194
12,Cheesman Park,0.0,1,-104.9664,39.7346
11,Chaffee Park,0.0,1,-105.012,39.7876
10,Central Park,0.0,1,-104.8791,39.7766
9,Capitol Hill,0.0,1,-104.9802,39.7336
8,CBD,0.0,1,-104.9932,39.7451
6,Belcaro,0.0,1,-104.9505,39.705


##### Cluster 2

In [42]:
df_c2 = den_merged.loc[den_merged['Cluster Labels'] == 2].round(4)
df_c2.to_csv(r'/Users/martinpalkovic/Desktop/git/Cluster_2.csv')
df_c2

Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Longitude,Latitude
51,Platt Park,0.0357,2,-104.9811,39.6866
39,Jefferson Park,0.0526,2,-105.0192,39.7522
2,Baker,0.0417,2,-104.9962,39.7152
71,West Colfax,0.0455,2,-105.0386,39.74
72,West Highland,0.0323,2,-105.0392,39.764
1,Auraria,0.037,2,-105.0083,39.7458


##### Cluster 3 

In [40]:
df_c3 = den_merged.loc[den_merged['Cluster Labels'] == 3].round(4)
df_c3.to_csv(r'/Users/martinpalkovic/Desktop/git/Cluster_3.csv')
df_c3

Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Longitude,Latitude
59,Sun Valley,0.0714,3,-105.0211,39.7358


##### Cluster 4

In [41]:
df_c4 = den_merged.loc[den_merged['Cluster Labels'] == 4].round(4)
df_c4.to_csv(r'/Users/martinpalkovic/Desktop/git/Cluster_4.csv')
df_c4

Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Longitude,Latitude
7,Berkeley,0.1081,4,-105.0394,39.7767
15,City Park West,0.1034,4,-104.9666,39.7454


### Conclusion

#### Neighborhoods falling within clusters 0 or 4 already have a large number of breweries. Neighborhoods in clusters 2 and 3 have a lower percentage, while all neighborhoods in cluster 1 (with the exception of Union Station) have no breweries. Living in the city of Denver and knowing most of these neighborhoods, I think these results can be analyzed in two different ways. One could look at this data and think, 'if I were to open a brewery, I should do it in a neighborhood with NO existing breweries'. However, I think this would lead to poor business at the brewery location, as some of these neighborhoods don't really feel like prime real estate for a brewery location. 

#### Always remember to incorporate qualitative observations into your analysis - the numbers never tell the whole story. Living in Denver, I know that the neighborhoods in Clusters 2 and 3 are considered 'up and coming' areas. Let's break this down further, and just look at the neighborhoods in clusters 2 and 3:

In [45]:
den_final = den_merged.loc[(den_merged['Cluster Labels'] == 2) | (den_merged['Cluster Labels'] == 3)].round(4)
den_final

Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Longitude,Latitude
51,Platt Park,0.0357,2,-104.9811,39.6866
39,Jefferson Park,0.0526,2,-105.0192,39.7522
2,Baker,0.0417,2,-104.9962,39.7152
71,West Colfax,0.0455,2,-105.0386,39.74
72,West Highland,0.0323,2,-105.0392,39.764
1,Auraria,0.037,2,-105.0083,39.7458
59,Sun Valley,0.0714,3,-105.0211,39.7358


#### I would consider all of these neighborhoods to be good options except for Sun Valley and Auraria - Sun Valley is a largely commercial area that incorporates the Denver Football stadium, I'd consider this a poor option. Auraria is adjacent to downtown, contains Metropolitan State University of Denver, and likely does not have any real estate available.  

In [49]:
den_final = den_final.drop([1, 59])
den_final.style.set_caption ("Martin's list of neighborhoods to build a brewery in Denver")
den_final.to_csv(r'/Users/martinpalkovic/Desktop/git/Cluster_Final.csv')
den_final.style.set_caption ("Martin's list of neighborhoods to build a brewery in Denver")

Unnamed: 0,Neighborhood,Brewery,Cluster Labels,Longitude,Latitude
51,Platt Park,0.0357,2,-104.9811,39.6866
39,Jefferson Park,0.0526,2,-105.0192,39.7522
2,Baker,0.0417,2,-104.9962,39.7152
71,West Colfax,0.0455,2,-105.0386,39.74
72,West Highland,0.0323,2,-105.0392,39.764
