# Analysis of the influence of nearby common venues on the square meter price of properties in Medellín City

#### By Santiago Velez

### Import libraries

In [1]:
# Import libraries
import pandas as pd
import numpy as np

import json # library to handle JSON files
import requests # library to handle requests

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if the first time you execute
import folium # map rendering library
print('Libraries imported')

Libraries imported


## Import data of properties in Medellin city

In [5]:
import json
with open('Properties_Medellin.geojson') as f:
    data = json.load(f)
data['features']

[{'type': 'Feature',
  'properties': {'OBJECTID': 1,
   'CODIGO': 87443,
   'CBML': '04130270007',
   'TIPO_DE_OFERTA': 2,
   'ESTADO': 'US',
   'AREA_PRIVADA': 80,
   'VALOR_COMERCIAL': 600000,
   'FECHA': '2013/02/21 00:00:00+00',
   'FUENTE': 4,
   'VALOR_M2': 7500,
   'NOMBRE_FUENTE': 22,
   'USO_OFERTA': 1},
  'geometry': {'type': 'Point',
   'coordinates': [-75.56013754861222, 6.281362203469805]}},
 {'type': 'Feature',
  'properties': {'OBJECTID': 2,
   'CODIGO': 90581,
   'CBML': '04130270014',
   'TIPO_DE_OFERTA': 2,
   'ESTADO': 'NV',
   'AREA_PRIVADA': 70,
   'VALOR_COMERCIAL': 500000,
   'FECHA': '2013/05/29 00:00:00+00',
   'FUENTE': 4,
   'VALOR_M2': 7000,
   'NOMBRE_FUENTE': 22,
   'USO_OFERTA': 3},
  'geometry': {'type': 'Point',
   'coordinates': [-75.56011991499687, 6.281879337244785]}},
 {'type': 'Feature',
  'properties': {'OBJECTID': 3,
   'CODIGO': 76837,
   'CBML': '04130280019',
   'TIPO_DE_OFERTA': 1,
   'ESTADO': 'US',
   'AREA_PRIVADA': 87,
   'VALOR_COMERCIAL

Data of properties into Dataframe.

In [6]:
df = pd.json_normalize(data['features'])
df.dropna()
df.head()

Unnamed: 0,type,properties.OBJECTID,properties.CODIGO,properties.CBML,properties.TIPO_DE_OFERTA,properties.ESTADO,properties.AREA_PRIVADA,properties.VALOR_COMERCIAL,properties.FECHA,properties.FUENTE,properties.VALOR_M2,properties.NOMBRE_FUENTE,properties.USO_OFERTA,geometry.type,geometry.coordinates
0,Feature,1,87443,4130270007,2,US,80,600000,2013/02/21 00:00:00+00,4,7500.0,22,1,Point,"[-75.56013754861222, 6.281362203469805]"
1,Feature,2,90581,4130270014,2,NV,70,500000,2013/05/29 00:00:00+00,4,7000.0,22,3,Point,"[-75.56011991499687, 6.281879337244785]"
2,Feature,3,76837,4130280019,1,US,87,90000000,2012/02/22 00:00:00+00,1,1034500.0,3,1,Point,"[-75.55950273469212, 6.281681757236898]"
3,Feature,4,90836,4130300003,2,US,35,190000,2013/06/11 00:00:00+00,1,5500.0,3,3,Point,"[-75.55881718772693, 6.281325465734376]"
4,Feature,5,81000,4130320021,1,US,94,100000000,2012/07/06 00:00:00+00,1,1064000.0,3,1,Point,"[-75.55848773560459, 6.281669343283677]"


Rename Columns

In [7]:
df.rename(columns = {
    'properties.CODIGO': 'Property Code',
    'properties.TIPO_DE_OFERTA': 'Offer Type',
    'properties.VALOR_COMERCIAL': 'Comercial Value',
    'properties.VALOR_M2': 'Mt2 Value',
    'geometry.coordinates': 'Coordinates'
}, inplace = True)

df.head()

Unnamed: 0,type,properties.OBJECTID,Property Code,properties.CBML,Offer Type,properties.ESTADO,properties.AREA_PRIVADA,Comercial Value,properties.FECHA,properties.FUENTE,Mt2 Value,properties.NOMBRE_FUENTE,properties.USO_OFERTA,geometry.type,Coordinates
0,Feature,1,87443,4130270007,2,US,80,600000,2013/02/21 00:00:00+00,4,7500.0,22,1,Point,"[-75.56013754861222, 6.281362203469805]"
1,Feature,2,90581,4130270014,2,NV,70,500000,2013/05/29 00:00:00+00,4,7000.0,22,3,Point,"[-75.56011991499687, 6.281879337244785]"
2,Feature,3,76837,4130280019,1,US,87,90000000,2012/02/22 00:00:00+00,1,1034500.0,3,1,Point,"[-75.55950273469212, 6.281681757236898]"
3,Feature,4,90836,4130300003,2,US,35,190000,2013/06/11 00:00:00+00,1,5500.0,3,3,Point,"[-75.55881718772693, 6.281325465734376]"
4,Feature,5,81000,4130320021,1,US,94,100000000,2012/07/06 00:00:00+00,1,1064000.0,3,1,Point,"[-75.55848773560459, 6.281669343283677]"


In [8]:
FIELDS = ["Property Code", "Offer Type", "Comercial Value", "Mt2 Value", "Coordinates" ]

df_properties = df[FIELDS]
print('Shape of the df: ', df_properties.shape)
df_properties.head()

Shape of the df:  (15725, 5)


Unnamed: 0,Property Code,Offer Type,Comercial Value,Mt2 Value,Coordinates
0,87443,2,600000,7500.0,"[-75.56013754861222, 6.281362203469805]"
1,90581,2,500000,7000.0,"[-75.56011991499687, 6.281879337244785]"
2,76837,1,90000000,1034500.0,"[-75.55950273469212, 6.281681757236898]"
3,90836,2,190000,5500.0,"[-75.55881718772693, 6.281325465734376]"
4,81000,1,100000000,1064000.0,"[-75.55848773560459, 6.281669343283677]"


#### Filter properties to work only with properties for sale (Offer type = 1)

In [12]:
df_properties_for_sale = df_properties[ df_properties["Offer Type"] == 1 ].copy()
print('Shape of the df: ', df_properties_for_sale.shape)
df_properties_for_sale.head(10)

Shape of the df:  (6149, 5)


Unnamed: 0,Property Code,Offer Type,Comercial Value,Mt2 Value,Coordinates
2,76837,1,90000000,1034500.0,"[-75.55950273469212, 6.281681757236898]"
4,81000,1,100000000,1064000.0,"[-75.55848773560459, 6.281669343283677]"
7,91380,1,25000000,182500.0,"[-75.55812114476444, 6.281473115285412]"
13,81003,1,85000000,552000.0,"[-75.56071377616377, 6.283044125616279]"
16,79042,1,44430000,1234000.0,"[-75.56041420318351, 6.283232305993462]"
17,79043,1,53022000,1233000.0,"[-75.56041420318351, 6.283232305993462]"
18,89701,1,150000000,1128000.0,"[-75.55928542553175, 6.283084405769118]"
21,80991,1,110000000,873000.0,"[-75.55805687085824, 6.28303369776205]"
22,81626,1,95000000,1131000.0,"[-75.55982318923958, 6.270291472838877]"
24,91749,1,70000000,1094000.0,"[-75.5608635558844, 6.271340524505576]"


### Map of properties for sale in Medellin

Set coordinates Medellín Colombian City 

In [None]:
address = 'Medellin, CO'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinate of Medellin are {}, {}.'.format(latitude, longitude))

In [None]:
# create map of Medellin using latitude and longitude values
map_medellin = folium.Map(location=[latitude, longitude], zoom_start=12)

In [13]:
# add markers to map
for index, row in df_properties_for_sale.iterrows():
    code = row['Property Code']
    latitude = (row['Coordinates'])[1]
    longitude = (row['Coordinates'])[0]
    label = folium.Popup(code, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_medellin)
    
map_medellin

## Quartiles Analysis to classify properties strata

Obtain quartiles ranges

In [14]:
qs = df_properties_for_sale['Mt2 Value'].quantile([.25, .5, .75]).values
print('The quartiles of the data for Mt2 Value are: ', qs)

#Function that determine the quartile
def checkQuartile(value, qs):
     return ('Q1' if value <= qs[0] else 'Q2' if value > qs[0] and value <= qs[1] else 'Q3' if value > qs[1] and value <= qs[2]  else 'Q4')
    
df_properties_for_sale['Quartile'] = df_properties_for_sale['Mt2 Value'].map(lambda x: checkQuartile(x, qs))

df_properties_for_sale.head()


[ 900000. 1412000. 2000000.]


Unnamed: 0,Property Code,Offer Type,Comercial Value,Mt2 Value,Coordinates,Quartile
2,76837,1,90000000,1034500.0,"[-75.55950273469212, 6.281681757236898]",Q2
4,81000,1,100000000,1064000.0,"[-75.55848773560459, 6.281669343283677]",Q2
7,91380,1,25000000,182500.0,"[-75.55812114476444, 6.281473115285412]",Q1
13,81003,1,85000000,552000.0,"[-75.56071377616377, 6.283044125616279]",Q1
16,79042,1,44430000,1234000.0,"[-75.56041420318351, 6.283232305993462]",Q2
17,79043,1,53022000,1233000.0,"[-75.56041420318351, 6.283232305993462]",Q2
18,89701,1,150000000,1128000.0,"[-75.55928542553175, 6.283084405769118]",Q2
21,80991,1,110000000,873000.0,"[-75.55805687085824, 6.28303369776205]",Q1
22,81626,1,95000000,1131000.0,"[-75.55982318923958, 6.270291472838877]",Q2
24,91749,1,70000000,1094000.0,"[-75.5608635558844, 6.271340524505576]",Q2


#### Create an independent Dataframe for quartiles Q1 ans Q4

In [15]:
df_properties_q1 = df_properties_for_sale[df_properties_for_sale['Quartile'] == 'Q1']
df_properties_q4 = df_properties_for_sale[df_properties_for_sale['Quartile'] == 'Q4']

print('Shape of q1 is: ', df_properties_q1.shape)
print('Shape of q4 is: ', df_properties_q4.shape)

Shape of q1 is:  (1540, 6)
Shape of q4 is:  (1488, 6)


In [16]:
# create map of Medellin using latitude and longitude values
map_properties = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map properties of quartile 4
for index, row in df_properties_q4.iterrows():
    code = row['Property Code']
    latitude = (row['Coordinates'])[1]
    longitude = (row['Coordinates'])[0]
    label = folium.Popup(code, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=4,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_properties)

# add markers to map properties of quartile 1
for index, row in df_properties_q1.iterrows():
    code = row['Property Code']
    latitude = (row['Coordinates'])[1]
    longitude = (row['Coordinates'])[0]
    label = folium.Popup(code, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=4,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_properties)
    
map_properties

## Venue Analysis of property of each group

### Statistic Sample
#### Select 20 properties  for each quartile Strata (Q1 and Q4)

In [20]:
df_sample_q1 = df_properties_q1.sample(20)
df_sample_q4 = df_properties_q4.sample(20)

### Segmentation and clustering Analysis for each group

#### Set Foursquare

In [17]:
CLIENT_ID = 'GELRBWEO4KWMOQPN5IZISPVSW5XYIS53422E1S1GOFXJFWLV' # Foursquare ID
CLIENT_SECRET = 'F5XFTGXAWEK0YKKV5T4RUSRLLSM23X0B2YCDVOVOMCSKPCAM' # Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # Number of venues to get in a request

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GELRBWEO4KWMOQPN5IZISPVSW5XYIS53422E1S1GOFXJFWLV
CLIENT_SECRET:F5XFTGXAWEK0YKKV5T4RUSRLLSM23X0B2YCDVOVOMCSKPCAM


#### Get venue for each property in the group

In [18]:
# Function to obtain venues for every property in groups 
def getNearbyVenues(codes, coordinates, radius=200):
    
    venues_list=[]
    for code, coordinate in zip(codes, coordinates):
        print(code)
        lat = coordinate[1]
        lng = coordinate[0]
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            code, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Property Code', 
                  'Property Latitude', 
                  'Property Longitude',  
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [23]:
# Group Q1 properties in Medellin venues
q1_properties_venues = getNearbyVenues(codes=df_sample_q1['Property Code'],
                                   coordinates=df_sample_q1['Coordinates']
                                  )

81389
85650
82931
88967
90733
85143
85683
85163
81164
85261
81157
80996
82401
83676
87908
81339
80989
91394
83002
83327


In [21]:
# Group Q4 properties in Medellin venues
q4_properties_venues = getNearbyVenues(codes=df_sample_q4['Property Code'],
                                   coordinates=df_sample_q4['Coordinates']
                                  )

77436
85659
81300
91028
88432
78021
88426
89489
78224
78693
77267
85030
88690
77751
87588
77597
77098
81802
91020
88305


### Analyze Each Property for each group Q1 and Q4

#### Create a dataframe to relate every property vs. every venue category

In [24]:
# one hot encoding
q1_onehot = pd.get_dummies(q1_properties_venues[['Venue Category']], prefix="", prefix_sep="")
q4_onehot = pd.get_dummies(q4_properties_venues[['Venue Category']], prefix="", prefix_sep="")


# add property Code column back to dataframe
q1_onehot['Property Code'] = q1_properties_venues['Property Code']
q4_onehot['Property Code'] = q4_properties_venues['Property Code'] 


# move property Code column to the first column
fixed_columns_q1 = [q1_onehot.columns[-1]] + list(q1_onehot.columns[:-1])
q1_onehot = q1_onehot[fixed_columns_q1]

fixed_columns_q4 = [q4_onehot.columns[-1]] + list(q4_onehot.columns[:-1])
q4_onehot = q4_onehot[fixed_columns_q4]

print('shape of the Q1 df is: ', q1_onehot.shape)
print('shape of the Q4 df is: ', q4_onehot.shape)
q1_onehot.head()

shape of the Q1 df is:  (11, 12)
shape of the Q4 df is:  (134, 76)


Unnamed: 0,Property Code,BBQ Joint,Caribbean Restaurant,Farm,Food Truck,Hardware Store,Ice Cream Shop,Men's Store,Mexican Restaurant,Park,Restaurant,Soccer Field
0,90733,0,0,0,1,0,0,0,0,0,0,0
1,90733,0,0,0,0,0,0,0,1,0,0,0
2,85163,1,0,0,0,0,0,0,0,0,0,0
3,85163,0,0,0,0,0,1,0,0,0,0,0
4,81164,0,0,0,0,0,0,0,0,1,0,0


#### Group rows by property code and by taking the mean of the frequency of occurrence of each category


In [25]:
q1_property_grouped = q1_onehot.groupby('Property Code').mean().reset_index()
q4_property_grouped = q4_onehot.groupby('Property Code').mean().reset_index()

# Let's confirm the new sizes
print('Q1', q1_property_grouped.shape)
print('Q4', q4_property_grouped.shape)

Q1 (8, 12)
Q4 (17, 76)


#### Create a dataframe with the properties and the top 10 venues per property
Function to sort venues in descendent order

In [26]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Dataframes with the property code and the top 10 venues

In [27]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Property Code']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframes for each group
q1_properties_venues_sorted = pd.DataFrame(columns=columns)
q1_properties_venues_sorted['Property Code'] = q1_property_grouped['Property Code']

q4_properties_venues_sorted = pd.DataFrame(columns=columns)
q4_properties_venues_sorted['Property Code'] = q4_property_grouped['Property Code']


for ind in np.arange(q1_property_grouped.shape[0]):
    q1_properties_venues_sorted.iloc[ind, 1:] = return_most_common_venues(q1_property_grouped.iloc[ind, :], num_top_venues)
    
for ind in np.arange(q4_property_grouped.shape[0]):
    q4_properties_venues_sorted.iloc[ind, 1:] = return_most_common_venues(q4_property_grouped.iloc[ind, :], num_top_venues)

    
q1_properties_venues_sorted.head()

Unnamed: 0,Property Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,81164,Park,Soccer Field,Restaurant,Mexican Restaurant,Men's Store,Ice Cream Shop,Hardware Store,Food Truck,Farm,Caribbean Restaurant
1,82401,Hardware Store,Caribbean Restaurant,Soccer Field,Restaurant,Park,Mexican Restaurant,Men's Store,Ice Cream Shop,Food Truck,Farm
2,83002,Men's Store,Soccer Field,Restaurant,Park,Mexican Restaurant,Ice Cream Shop,Hardware Store,Food Truck,Farm,Caribbean Restaurant
3,83327,Restaurant,Soccer Field,Park,Mexican Restaurant,Men's Store,Ice Cream Shop,Hardware Store,Food Truck,Farm,Caribbean Restaurant
4,83676,Soccer Field,Restaurant,Park,Mexican Restaurant,Men's Store,Ice Cream Shop,Hardware Store,Food Truck,Farm,Caribbean Restaurant


In [28]:
q4_properties_venues_sorted.head()

Unnamed: 0,Property Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,77098,Burger Joint,Coffee Shop,Dessert Shop,Restaurant,Electronics Store,Shopping Mall,Ice Cream Shop,Sporting Goods Shop,Peruvian Restaurant,Café
1,77267,Wings Joint,Hotel,Cocktail Bar,Breakfast Spot,Food Truck,Diner,Doctor's Office,Electronics Store,Eye Doctor,Food
2,77436,Park,Bakery,Food,Wings Joint,Food Service,Dessert Shop,Diner,Doctor's Office,Electronics Store,Eye Doctor
3,77597,Grocery Store,Other Nightlife,Clothing Store,Shopping Mall,Gay Bar,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck
4,77751,Gymnastics Gym,Australian Restaurant,Bakery,Bed & Breakfast,Spa,Wings Joint,Food Truck,Doctor's Office,Electronics Store,Eye Doctor


## Cluster Group of Properties

## Clustering and segmentation for Q1 group

K-means with 5 clusters

In [29]:
# set number of clusters
kclusters = 5

q1_property_grouped_for_clustering = q1_property_grouped.drop('Property Code', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(q1_property_grouped_for_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 0, 3, 1, 4, 0, 0, 0])

#### New dataframe with complete information of property including cluster group

In [30]:
# add clustering labels
q1_properties_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
q1_properties_venues_sorted = q1_properties_venues_sorted.astype({'Cluster Labels': int})

q1_properties_merged = df_sample_q1

# merge toronto_grouped with toronto data (df) to add latitude/longitude for each neighborhood
q1_properties_merged = q1_properties_merged.join(q1_properties_venues_sorted.set_index('Property Code'), on='Property Code')

q1_properties_merged = q1_properties_merged.dropna()
q1_properties_merged = q1_properties_merged.astype({'Cluster Labels': int})

q1_properties_merged.head()

Unnamed: 0,Property Code,Offer Type,Comercial Value,Mt2 Value,Coordinates,Quartile,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
71,90733,1,17480000,230000.0,"[-75.56346588722369, 6.307318250898297]",Q1,0,Mexican Restaurant,Food Truck,Soccer Field,Restaurant,Park,Men's Store,Ice Cream Shop,Hardware Store,Farm,Caribbean Restaurant
90,85163,1,53702700,725500.0,"[-75.56300880495455, 6.311556190903]",Q1,0,Ice Cream Shop,BBQ Joint,Soccer Field,Restaurant,Park,Mexican Restaurant,Men's Store,Hardware Store,Food Truck,Farm
981,81164,1,150000000,707500.0,"[-75.5480243542727, 6.266565945200079]",Q1,2,Park,Soccer Field,Restaurant,Mexican Restaurant,Men's Store,Ice Cream Shop,Hardware Store,Food Truck,Farm,Caribbean Restaurant
1361,82401,1,48000000,800000.0,"[-75.59178576128721, 6.290340364452782]",Q1,0,Hardware Store,Caribbean Restaurant,Soccer Field,Restaurant,Park,Mexican Restaurant,Men's Store,Ice Cream Shop,Food Truck,Farm
948,83676,1,30720000,240000.0,"[-75.57595112531395, 6.291851614743772]",Q1,4,Soccer Field,Restaurant,Park,Mexican Restaurant,Men's Store,Ice Cream Shop,Hardware Store,Food Truck,Farm,Caribbean Restaurant


#### Visualization in a map of the result of the clusters

In [31]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for coordinate, poi, cluster in zip(q1_properties_merged['Coordinates'], q1_properties_merged['Property Code'], q1_properties_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    lat = coordinate[1]
    lon = coordinate[0]
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters for Q1 group

#### Evaluate and stablish a describing group that distinguish each cluster.

#### Cluster 1

In [32]:
q1_properties_merged.loc[q1_properties_merged['Cluster Labels'] == 0, q1_properties_merged.columns[[1] + list(range(5, q1_properties_merged.shape[1]))]]

Unnamed: 0,Offer Type,Quartile,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
71,1,Q1,0,Mexican Restaurant,Food Truck,Soccer Field,Restaurant,Park,Men's Store,Ice Cream Shop,Hardware Store,Farm,Caribbean Restaurant
90,1,Q1,0,Ice Cream Shop,BBQ Joint,Soccer Field,Restaurant,Park,Mexican Restaurant,Men's Store,Hardware Store,Food Truck,Farm
1361,1,Q1,0,Hardware Store,Caribbean Restaurant,Soccer Field,Restaurant,Park,Mexican Restaurant,Men's Store,Ice Cream Shop,Food Truck,Farm
1808,1,Q1,0,Farm,Soccer Field,Restaurant,Park,Mexican Restaurant,Men's Store,Ice Cream Shop,Hardware Store,Food Truck,Caribbean Restaurant


#### Cluster 2

In [33]:
q1_properties_merged.loc[q1_properties_merged['Cluster Labels'] == 1, q1_properties_merged.columns[[1] + list(range(5, q1_properties_merged.shape[1]))]]

Unnamed: 0,Offer Type,Quartile,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11458,1,Q1,1,Restaurant,Soccer Field,Park,Mexican Restaurant,Men's Store,Ice Cream Shop,Hardware Store,Food Truck,Farm,Caribbean Restaurant


#### Cluster 3

In [34]:
q1_properties_merged.loc[q1_properties_merged['Cluster Labels'] == 2, q1_properties_merged.columns[[1] + list(range(5, q1_properties_merged.shape[1]))]]

Unnamed: 0,Offer Type,Quartile,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
981,1,Q1,2,Park,Soccer Field,Restaurant,Mexican Restaurant,Men's Store,Ice Cream Shop,Hardware Store,Food Truck,Farm,Caribbean Restaurant


#### Cluster 4

In [35]:
q1_properties_merged.loc[q1_properties_merged['Cluster Labels'] == 3, q1_properties_merged.columns[[1] + list(range(5, q1_properties_merged.shape[1]))]]

Unnamed: 0,Offer Type,Quartile,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1356,1,Q1,3,Men's Store,Soccer Field,Restaurant,Park,Mexican Restaurant,Ice Cream Shop,Hardware Store,Food Truck,Farm,Caribbean Restaurant


#### Cluster 5

In [36]:
q1_properties_merged.loc[q1_properties_merged['Cluster Labels'] == 4, q1_properties_merged.columns[[1] + list(range(5, q1_properties_merged.shape[1]))]]

Unnamed: 0,Offer Type,Quartile,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
948,1,Q1,4,Soccer Field,Restaurant,Park,Mexican Restaurant,Men's Store,Ice Cream Shop,Hardware Store,Food Truck,Farm,Caribbean Restaurant


## Clustering and segmentation for Q4 group

K-means with 5 clusters

In [40]:
# set number of clusters
kclusters = 5

q4_property_grouped_for_clustering = q4_property_grouped.drop('Property Code', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(q4_property_grouped_for_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 4, 0, 0, 2])

#### New dataframe with complete information of property including cluster group

In [41]:
# add clustering labels
q4_properties_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
q4_properties_venues_sorted = q4_properties_venues_sorted.astype({'Cluster Labels': int})

q4_properties_merged = df_sample_q4

# merge toronto_grouped with toronto data (df) to add latitude/longitude for each neighborhood
q4_properties_merged = q4_properties_merged.join(q4_properties_venues_sorted.set_index('Property Code'), on='Property Code')

# Drop properties without registers of nearby venues
q4_properties_merged = q4_properties_merged.dropna()
q4_properties_merged = q4_properties_merged.astype({'Cluster Labels': int})

q4_properties_merged.head()

Unnamed: 0,Property Code,Offer Type,Comercial Value,Mt2 Value,Coordinates,Quartile,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2314,77436,1,165800000,2153000.0,"[-75.58287601523018, 6.274513535355053]",Q4,0,Park,Bakery,Food,Wings Joint,Food Service,Dessert Shop,Diner,Doctor's Office,Electronics Store,Eye Doctor
6139,85659,1,620000000,2450500.0,"[-75.56465462167566, 6.21391964747466]",Q4,3,Doctor's Office,Wings Joint,Creperie,Dessert Shop,Diner,Electronics Store,Eye Doctor,Food,Food Service,Food Truck
4744,81300,1,2200000000,6111000.0,"[-75.57145784355598, 6.244660139380769]",Q4,0,Historic Site,Restaurant,Tour Provider,Metro Station,Park,Bed & Breakfast,Pastry Shop,Dessert Shop,Diner,Doctor's Office
7989,91028,1,202000000,2729500.0,"[-75.58560678548152, 6.234421809693701]",Q4,1,Pet Store,Italian Restaurant,Wings Joint,Food Service,Dessert Shop,Diner,Doctor's Office,Electronics Store,Eye Doctor,Food
12155,78021,1,420000000,2346500.0,"[-75.57920571523276, 6.191546489435118]",Q4,0,Convenience Store,Latin American Restaurant,Gym,Restaurant,Shopping Mall,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service


#### Visualization in a map of the result of the clusters

In [42]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for coordinate, poi, cluster in zip(q4_properties_merged['Coordinates'], q4_properties_merged['Property Code'], q4_properties_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    lat = coordinate[1]
    lon = coordinate[0]
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters for Q4 group

#### Evaluate and stablish a describing group that distinguish each cluster.

#### Cluster 1

In [43]:
q4_properties_merged.loc[q4_properties_merged['Cluster Labels'] == 0, q4_properties_merged.columns[[1] + list(range(5, q4_properties_merged.shape[1]))]]

Unnamed: 0,Offer Type,Quartile,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2314,1,Q4,0,Park,Bakery,Food,Wings Joint,Food Service,Dessert Shop,Diner,Doctor's Office,Electronics Store,Eye Doctor
4744,1,Q4,0,Historic Site,Restaurant,Tour Provider,Metro Station,Park,Bed & Breakfast,Pastry Shop,Dessert Shop,Diner,Doctor's Office
12155,1,Q4,0,Convenience Store,Latin American Restaurant,Gym,Restaurant,Shopping Mall,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck,Food Service
10734,1,Q4,0,Bakery,Dessert Shop,Gymnastics Gym,Colombian Restaurant,Food Service,Diner,Doctor's Office,Electronics Store,Eye Doctor,Food
12251,1,Q4,0,Convenience Store,Music School,Memorial Site,Wings Joint,Food,Diner,Doctor's Office,Electronics Store,Eye Doctor,Food Service
8628,1,Q4,0,Wings Joint,Hotel,Cocktail Bar,Breakfast Spot,Food Truck,Diner,Doctor's Office,Electronics Store,Eye Doctor,Food
10064,1,Q4,0,Gymnastics Gym,Australian Restaurant,Bakery,Bed & Breakfast,Spa,Wings Joint,Food Truck,Doctor's Office,Electronics Store,Eye Doctor
10757,1,Q4,0,Pizza Place,Italian Restaurant,Mexican Restaurant,Supermarket,Peruvian Restaurant,BBQ Joint,Creperie,Hostel,Dessert Shop,Burger Joint
10437,1,Q4,0,Grocery Store,Other Nightlife,Clothing Store,Shopping Mall,Gay Bar,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Truck
12719,1,Q4,0,Burger Joint,Coffee Shop,Dessert Shop,Restaurant,Electronics Store,Shopping Mall,Ice Cream Shop,Sporting Goods Shop,Peruvian Restaurant,Café


#### Cluster 2

In [44]:
q4_properties_merged.loc[q4_properties_merged['Cluster Labels'] == 1, q4_properties_merged.columns[[1] + list(range(5, q4_properties_merged.shape[1]))]]

Unnamed: 0,Offer Type,Quartile,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7989,1,Q4,1,Pet Store,Italian Restaurant,Wings Joint,Food Service,Dessert Shop,Diner,Doctor's Office,Electronics Store,Eye Doctor,Food


#### Cluster 3

In [45]:
q4_properties_merged.loc[q4_properties_merged['Cluster Labels'] == 2, q4_properties_merged.columns[[1] + list(range(5, q4_properties_merged.shape[1]))]]

Unnamed: 0,Offer Type,Quartile,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5958,1,Q4,2,Burger Joint,Food Truck,Dessert Shop,Diner,Doctor's Office,Electronics Store,Eye Doctor,Food,Food Service,Wings Joint


#### Cluster 4

In [46]:
q4_properties_merged.loc[q4_properties_merged['Cluster Labels'] == 3, q4_properties_merged.columns[[1] + list(range(5, q4_properties_merged.shape[1]))]]

Unnamed: 0,Offer Type,Quartile,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6139,1,Q4,3,Doctor's Office,Wings Joint,Creperie,Dessert Shop,Diner,Electronics Store,Eye Doctor,Food,Food Service,Food Truck


#### Cluster 5

In [47]:
q4_properties_merged.loc[q4_properties_merged['Cluster Labels'] == 4, q4_properties_merged.columns[[1] + list(range(5, q4_properties_merged.shape[1]))]]

Unnamed: 0,Offer Type,Quartile,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12114,1,Q4,4,Food Truck,Supermarket,Wings Joint,Dessert Shop,Diner,Doctor's Office,Electronics Store,Eye Doctor,Food,Food Service
13907,1,Q4,4,Supermarket,South American Restaurant,Wings Joint,Food Service,Dance Studio,Dessert Shop,Diner,Doctor's Office,Electronics Store,Eye Doctor
