# Peer-graded Assignment: Capstone Project - The Battle of Neighborhoods (Week 2)

### Collect Inspection Data

After importing the necessary libraries, I download the data from the HM Land Registry website as follows:

In [1]:
import os # Operating System
import numpy as np
import pandas as pd
import datetime as dt # Datetime
import json # library to handle JSON files

# !pip install geopy 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe


import matplotlib.cm as cm
import matplotlib.colors as colors

# !pip install folium
import folium #import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [2]:
df_ppd = pd.read_csv("http://prod2.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-2018.csv")

###  Explore and Understand Data

After I read the dataset that I collected from the HM Land Registry website into a pandas data frame and display the first five rows of it as follows:

In [3]:
df_ppd.head(5)

Unnamed: 0,{7011B109-CFCA-8ED6-E053-6B04A8C075C1},280000,2018-06-04 00:00,IP4 5ES,S,N,F,3,Unnamed: 8,RANDWELL CLOSE,Unnamed: 10,IPSWICH,IPSWICH.1,SUFFOLK,A,A.1
0,{7011B109-CFCB-8ED6-E053-6B04A8C075C1},280000,2018-05-29 00:00,IP1 4BS,T,N,F,261,,NORWICH ROAD,,IPSWICH,IPSWICH,SUFFOLK,A,A
1,{7011B109-CFCC-8ED6-E053-6B04A8C075C1},170000,2018-04-27 00:00,IP4 4BH,T,N,F,31,,PARADE ROAD,,IPSWICH,IPSWICH,SUFFOLK,A,A
2,{7011B109-CFCD-8ED6-E053-6B04A8C075C1},246000,2018-05-25 00:00,IP1 6NB,S,N,F,42,,ELMCROFT ROAD,,IPSWICH,IPSWICH,SUFFOLK,A,A
3,{7011B109-CFCE-8ED6-E053-6B04A8C075C1},180000,2018-06-08 00:00,IP3 9LZ,T,N,F,48,,WYNTERTON CLOSE,,IPSWICH,IPSWICH,SUFFOLK,A,A
4,{7011B109-CFCF-8ED6-E053-6B04A8C075C1},245000,2018-05-11 00:00,IP1 4BU,T,N,F,235,,NORWICH ROAD,,IPSWICH,IPSWICH,SUFFOLK,A,A


### Data preparation and preprocessing

At this stage, we prepare our dataset for the modeling process, opting for the most suitable machine learning algorithm for our scope. Accordingly, we perform the following steps:

1.Rename the column names
2.Format the date column
3.Sort data by date of sale
4.Select data only for the city of London
5.Make a list of street names in London
6.Calculate the street-wise average price of the property
7.Read the street-wise coordinates into a data frame, eliminating recurring word London from individual names
8.Join the data to find the coordinates of locations which fit into client's budget
9.Plot recommended locations on London map along with current market prices

In [4]:

# Assign meaningful column names
df_ppd.columns = ['TUID', 'Price', 'Date_Transfer', 'Postcode', 'Prop_Type', 'Old_New', 'Duration', 'PAON', \
                  'SAON', 'Street', 'Locality', 'Town_City', 'District', 'County', 'PPD_Cat_Type', 'Record_Status']

In [None]:
# Format the date column
df_ppd['Date_Transfer'] = df_ppd['Date_Transfer'].apply(pd.to_datetime)

# Delete all obsolete transactions which were done before 2016
df_ppd.drop(df_ppd[df_ppd.Date_Transfer.dt.year < 2016].index, inplace=True)

# Sort by Date of Sale
df_ppd.sort_values(by=['Date_Transfer'],ascending=[False],inplace=True)

In [6]:
df_ppd_london = df_ppd.query("Town_City == 'LONDON'")

# Make a list of street names in LONDON
streets = df_ppd_london['Street'].unique().tolist()
df_grp_price = df_ppd_london.groupby(['Street'])['Price'].mean().reset_index()

# Give meaningful names to the columns
df_grp_price.columns = ['Street', 'Avg_Price']
#Input your Budget's Upper Limit and Lower Limit - Find the locations df_grp_price which fits your budget
df_affordable = df_grp_price.query("(Avg_Price >= 2200000) & (Avg_Price <= 2500000)")

In [7]:
# Display the dataframe
df_affordable

Unnamed: 0,Street,Avg_Price
196,ALBION SQUARE,2.450000e+06
390,ANHALT ROAD,2.435000e+06
405,ANSDELL TERRACE,2.250000e+06
422,APPLEGARTH ROAD,2.400000e+06
856,BARONSMEAD ROAD,2.375000e+06
982,BEAUCLERC ROAD,2.480000e+06
1103,BELVEDERE DRIVE,2.340000e+06
1216,BICKENHALL STREET,2.208500e+06
1254,BIRCHLANDS AVENUE,2.217000e+06
1554,BRAMPTON GROVE,2.456875e+06


In [8]:
for index, item in df_affordable.iterrows():
    print(f"index: {index}")
    print(f"item: {item}")
    print(f"item.Street only: {item.Street}")

index: 196
item: Street       ALBION SQUARE
Avg_Price         2.45e+06
Name: 196, dtype: object
item.Street only: ALBION SQUARE
index: 390
item: Street       ANHALT ROAD
Avg_Price      2.435e+06
Name: 390, dtype: object
item.Street only: ANHALT ROAD
index: 405
item: Street       ANSDELL TERRACE
Avg_Price           2.25e+06
Name: 405, dtype: object
item.Street only: ANSDELL TERRACE
index: 422
item: Street       APPLEGARTH ROAD
Avg_Price            2.4e+06
Name: 422, dtype: object
item.Street only: APPLEGARTH ROAD
index: 856
item: Street       BARONSMEAD ROAD
Avg_Price          2.375e+06
Name: 856, dtype: object
item.Street only: BARONSMEAD ROAD
index: 982
item: Street       BEAUCLERC ROAD
Avg_Price          2.48e+06
Name: 982, dtype: object
item.Street only: BEAUCLERC ROAD
index: 1103
item: Street       BELVEDERE DRIVE
Avg_Price           2.34e+06
Name: 1103, dtype: object
item.Street only: BELVEDERE DRIVE
index: 1216
item: Street       BICKENHALL STREET
Avg_Price           2.2085e+06
N

In [9]:
geolocator = Nominatim()


  if __name__ == '__main__':


In [10]:
df_affordable['city_coord'] = df_affordable['Street'].apply(geolocator.geocode).apply(lambda x: (x.latitude, x.longitude))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [11]:
df_affordable

Unnamed: 0,Street,Avg_Price,city_coord
196,ALBION SQUARE,2.450000e+06,"(-41.27375755, 173.28939323910353)"
390,ANHALT ROAD,2.435000e+06,"(51.4803164, -0.1668011)"
405,ANSDELL TERRACE,2.250000e+06,"(51.4998899, -0.1891027)"
422,APPLEGARTH ROAD,2.400000e+06,"(53.7486539, -0.3266704)"
856,BARONSMEAD ROAD,2.375000e+06,"(51.4773147, -0.239457)"
982,BEAUCLERC ROAD,2.480000e+06,"(30.2114523, -81.6179807)"
1103,BELVEDERE DRIVE,2.340000e+06,"(44.7075622, -63.5455988)"
1216,BICKENHALL STREET,2.208500e+06,"(51.5212014, -0.1589082)"
1254,BIRCHLANDS AVENUE,2.217000e+06,"(51.4483941, -0.1604676)"
1554,BRAMPTON GROVE,2.456875e+06,"(51.5899607, -0.3185249)"


In [12]:
df_affordable[['Latitude', 'Longitude']] = df_affordable['city_coord'].apply(pd.Series)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


In [13]:
df_affordable

Unnamed: 0,Street,Avg_Price,city_coord,Latitude,Longitude
196,ALBION SQUARE,2.450000e+06,"(-41.27375755, 173.28939323910353)",-41.273758,173.289393
390,ANHALT ROAD,2.435000e+06,"(51.4803164, -0.1668011)",51.480316,-0.166801
405,ANSDELL TERRACE,2.250000e+06,"(51.4998899, -0.1891027)",51.499890,-0.189103
422,APPLEGARTH ROAD,2.400000e+06,"(53.7486539, -0.3266704)",53.748654,-0.326670
856,BARONSMEAD ROAD,2.375000e+06,"(51.4773147, -0.239457)",51.477315,-0.239457
982,BEAUCLERC ROAD,2.480000e+06,"(30.2114523, -81.6179807)",30.211452,-81.617981
1103,BELVEDERE DRIVE,2.340000e+06,"(44.7075622, -63.5455988)",44.707562,-63.545599
1216,BICKENHALL STREET,2.208500e+06,"(51.5212014, -0.1589082)",51.521201,-0.158908
1254,BIRCHLANDS AVENUE,2.217000e+06,"(51.4483941, -0.1604676)",51.448394,-0.160468
1554,BRAMPTON GROVE,2.456875e+06,"(51.5899607, -0.3185249)",51.589961,-0.318525


In [14]:
df = df_affordable.drop(columns=['city_coord'])
df

Unnamed: 0,Street,Avg_Price,Latitude,Longitude
196,ALBION SQUARE,2.450000e+06,-41.273758,173.289393
390,ANHALT ROAD,2.435000e+06,51.480316,-0.166801
405,ANSDELL TERRACE,2.250000e+06,51.499890,-0.189103
422,APPLEGARTH ROAD,2.400000e+06,53.748654,-0.326670
856,BARONSMEAD ROAD,2.375000e+06,51.477315,-0.239457
982,BEAUCLERC ROAD,2.480000e+06,30.211452,-81.617981
1103,BELVEDERE DRIVE,2.340000e+06,44.707562,-63.545599
1216,BICKENHALL STREET,2.208500e+06,51.521201,-0.158908
1254,BIRCHLANDS AVENUE,2.217000e+06,51.448394,-0.160468
1554,BRAMPTON GROVE,2.456875e+06,51.589961,-0.318525


In [15]:
address = 'London, UK'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of London City are {}, {}.'.format(latitude, longitude))

  app.launch_new_instance()


The geograpical coordinate of London City are 51.5073219, -0.1276474.


In [16]:
# create map of London using latitude and longitude values
map_london = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, price, street in zip(df['Latitude'], df['Longitude'], df['Avg_Price'], df['Street']):
    label = '{}, {}'.format(street, price)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_london)  
    
map_london

In [17]:
#Define Foursquare Credentials and Version

CLIENT_ID = 'TJDE2ZNOVN1YZBYFCV0IIR5YEZP4MGRN1MAITPV1EUEYALZB' # Foursquare ID
CLIENT_SECRET = '5BWMQV2450HHTVWUH2A1GUJPMWTREWJCKIIPBIGCOSFZDLSR' # Foursquare Secret
VERSION = '20180604' # Foursquare API version

I can now proceed to the Modeling phase. I will analyze neighborhoods to recommend real estates where home buyers can make a real estate investment. I will then recommend profitable venues according to amenities and essential facilities surrounding such venues i.e. elementary schools, high schools, hospitals & grocery stores.

### Modeling


After exploring the dataset and gaining insights into it, I am ready to use the clustering methodology to analyze real estates. I will use the k-means clustering technique as it is fast and efficient in terms of computational cost, is highly flexible to account for mutations in real estate market in London and is accurate.

In [18]:

def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Street', 
                  'Street Latitude', 
                  'Street Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
# Run the above function on each location and create a new dataframe called location_venues and display it.
location_venues = getNearbyVenues(names=df['Street'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

ALBION SQUARE
ANHALT ROAD
ANSDELL TERRACE
APPLEGARTH ROAD
BARONSMEAD ROAD
BEAUCLERC ROAD
BELVEDERE DRIVE
BICKENHALL STREET
BIRCHLANDS AVENUE
BRAMPTON GROVE
BRIARDALE GARDENS
BROOKWAY
BURBAGE ROAD
BURY WALK
CALLCOTT STREET
CAMPDEN HILL ROAD
CAMPION ROAD
CANNING PLACE
CARLISLE ROAD
CARLTON GARDENS
CARLYLE COURT
CHALCOT SQUARE
CHARLES LANE
CHELSEA CRESCENT
CHESTER CLOSE NORTH
CHEYNE COURT
CHEYNE ROW
CHISWICK MALL
CITY ROAD
CLARENDON STREET
CLONCURRY STREET
COLBECK MEWS
COLLEGE CRESCENT
CORNWALL TERRACE MEWS
COURT LANE GARDENS
CRESCENT GROVE
DALEBURY ROAD
DEWHURST ROAD
DORIA ROAD
DOWNSHIRE HILL
DUCHESS WALK
ECCLESTON SQUARE MEWS
EGBERT STREET
EGERTON PLACE
ELM PARK ROAD
FLORAL STREET
FRANK DIXON WAY
FULTON MEWS
GERARD ROAD
GERRARD ROAD
GIRDLERS ROAD
GLOUCESTER CRESCENT
GORDON PLACE
GRAFTON SQUARE
GRAHAM TERRACE
HARMAN DRIVE
HARRIS STREET
HAVANNAH STREET
HAZLEWELL ROAD
HEREFORD MEWS
HERONDALE AVENUE
HIGHGATE HIGH STREET
HIGHWOOD HILL
HILLGATE PLACE
HOLLYCROFT AVENUE
HOLLYWOOD MEWS
HONEYWELL

In [20]:
location_venues

Unnamed: 0,Street,Street Latitude,Street Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,ALBION SQUARE,-41.273758,173.289393,The Free House,-41.273340,173.287364,Bar
1,ALBION SQUARE,-41.273758,173.289393,The Indian Cafe,-41.273308,173.286530,Indian Restaurant
2,ALBION SQUARE,-41.273758,173.289393,Queen's Gardens,-41.273671,173.291383,Park
3,ALBION SQUARE,-41.273758,173.289393,Urban,-41.274355,173.286317,New American Restaurant
4,ALBION SQUARE,-41.273758,173.289393,The Bridge Street Collective,-41.272520,173.285517,Café
5,ALBION SQUARE,-41.273758,173.289393,Fish Stop,-41.276010,173.289592,Fish & Chips Shop
6,ALBION SQUARE,-41.273758,173.289393,Deville Cafe,-41.271941,173.285535,Beer Garden
7,ALBION SQUARE,-41.273758,173.289393,Mango,-41.274460,173.285345,Indian Restaurant
8,ALBION SQUARE,-41.273758,173.289393,Hopgood's,-41.274749,173.283831,Restaurant
9,ALBION SQUARE,-41.273758,173.289393,Sprig & Fern,-41.274508,173.286527,Brewery


In [21]:
location_venues.groupby('Street').count()

Unnamed: 0_level_0,Street Latitude,Street Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Street,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ALBION SQUARE,26,26,26,26,26,26
ANHALT ROAD,15,15,15,15,15,15
ANSDELL TERRACE,44,44,44,44,44,44
APPLEGARTH ROAD,4,4,4,4,4,4
BARONSMEAD ROAD,14,14,14,14,14,14
BEAUCLERC ROAD,5,5,5,5,5,5
BELVEDERE DRIVE,1,1,1,1,1,1
BICKENHALL STREET,67,67,67,67,67,67
BIRCHLANDS AVENUE,10,10,10,10,10,10
BRAMPTON GROVE,2,2,2,2,2,2


In [22]:
# get the List of Unique Categories
print('There are {} uniques categories.'.format(len(location_venues['Venue Category'].unique())))

There are 347 uniques categories.


In [23]:
location_venues.shape

(4597, 7)

In [24]:
# one hot encoding
venues_onehot = pd.get_dummies(location_venues[['Venue Category']], prefix="", prefix_sep="")

# add street column back to dataframe
venues_onehot['Street'] = location_venues['Street'] 

# move street column to the first column
fixed_columns = [venues_onehot.columns[-1]] + list(venues_onehot.columns[:-1])

#fixed_columns
venues_onehot = venues_onehot[fixed_columns]

venues_onehot.head()

Unnamed: 0,Street,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Arcade,Argentinian Restaurant,...,Vietnamese Restaurant,Warehouse Store,Waterfront,Weight Loss Center,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,ALBION SQUARE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,ALBION SQUARE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,ALBION SQUARE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,ALBION SQUARE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,ALBION SQUARE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
london_grouped = venues_onehot.groupby('Street').mean().reset_index()
london_grouped

Unnamed: 0,Street,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Arcade,Argentinian Restaurant,...,Vietnamese Restaurant,Warehouse Store,Waterfront,Weight Loss Center,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,ALBION SQUARE,0.0000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0
1,ANHALT ROAD,0.0000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0
2,ANSDELL TERRACE,0.0000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0
3,APPLEGARTH ROAD,0.0000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0
4,BARONSMEAD ROAD,0.0000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0
5,BEAUCLERC ROAD,0.0000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0
6,BELVEDERE DRIVE,0.0000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0
7,BICKENHALL STREET,0.0000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.000000,0.0,0.014925,0.000000,0.014925,0.014925,0.0,0.0
8,BIRCHLANDS AVENUE,0.0000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0
9,BRAMPTON GROVE,0.0000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0


In [26]:
# What are the top 5 venues/facilities nearby profitable real estate investments?#

num_top_venues = 5

for hood in london_grouped['Street']:
    print("----"+hood+"----")
    temp = london_grouped[london_grouped['Street'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----ALBION SQUARE----
               venue  freq
0               Café  0.19
1         Restaurant  0.08
2                Bar  0.08
3                Pub  0.08
4  Indian Restaurant  0.08


----ANHALT ROAD----
                 venue  freq
0                  Pub  0.27
1    French Restaurant  0.13
2        Grocery Store  0.13
3  Japanese Restaurant  0.07
4               Garden  0.07


----ANSDELL TERRACE----
                venue  freq
0           Juice Bar  0.09
1               Hotel  0.07
2                 Pub  0.07
3          Restaurant  0.07
4  Italian Restaurant  0.05


----APPLEGARTH ROAD----
       venue  freq
0  Nightclub  0.25
1     Casino  0.25
2        Pub  0.25
3        Bar  0.25
4        ATM  0.00


----BARONSMEAD ROAD----
               venue  freq
0        Sports Club  0.07
1               Park  0.07
2        Pizza Place  0.07
3        Coffee Shop  0.07
4  Food & Drink Shop  0.07


----BEAUCLERC ROAD----
                           venue  freq
0                            Spa  

            venue  freq
0            Park   0.2
1  Soccer Stadium   0.1
2     Pizza Place   0.1
3     Sports Club   0.1
4        Gym Pool   0.1


----GERRARD ROAD----
               venue  freq
0  Indian Restaurant  0.25
1                Pub  0.25
2           Bus Stop  0.25
3        Pizza Place  0.25
4                ATM  0.00


----GIRDLERS ROAD----
                venue  freq
0                 Pub  0.14
1      Sandwich Place  0.05
2   Convention Center  0.05
3           Gastropub  0.05
4  Italian Restaurant  0.05


----GORDON PLACE----
         venue  freq
0       Resort  0.33
1   Steakhouse  0.33
2  Pizza Place  0.33
3          ATM  0.00
4       Palace  0.00


----GRAFTON SQUARE----
          venue  freq
0           Pub  0.10
1    Restaurant  0.05
2          Café  0.05
3           Bar  0.04
4  Burger Joint  0.04


----GRAHAM TERRACE----
                venue  freq
0                Café   0.5
1   Indian Restaurant   0.5
2              Palace   0.0
3  Persian Restaurant   0.0
4       

                           venue  freq
0                     Restaurant   1.0
1                            ATM   0.0
2  Paper / Office Supplies Store   0.0
3            Peruvian Restaurant   0.0
4             Persian Restaurant   0.0


----PANTON STREET----
                venue  freq
0         Golf Course  0.67
1        Home Service  0.33
2                 ATM  0.00
3           Pet Store  0.00
4  Persian Restaurant  0.00


----PARK CRESCENT----
                           venue  freq
0                   Camera Store   0.5
1                  Moving Target   0.5
2                            ATM   0.0
3  Paper / Office Supplies Store   0.0
4             Persian Restaurant   0.0


----PARK LANE----
                  venue  freq
0  Fast Food Restaurant  0.07
1    Mexican Restaurant  0.05
2             Nightclub  0.05
3        Discount Store  0.04
4           Pizza Place  0.04


----PARKE ROAD----
            venue  freq
0        Gym Pool  0.25
1  Breakfast Spot  0.25
2             Pub  0.25

                  venue  freq
0          Burger Joint  0.25
1  Gym / Fitness Center  0.25
2         Grocery Store  0.25
3           Pizza Place  0.25
4                   ATM  0.00


----THE HEXAGON----
                venue  freq
0      Clothing Store  0.16
1         Coffee Shop  0.08
2                 Pub  0.08
3   Electronics Store  0.05
4  Italian Restaurant  0.05


----TREDEGAR SQUARE----
          venue  freq
0      Bus Stop  0.21
1           Pub  0.17
2   Pizza Place  0.14
3   Coffee Shop  0.07
4  Burger Joint  0.03


----TRINITY STREET----
            venue  freq
0  Clothing Store  0.12
1            Café  0.12
2   Bowling Alley  0.06
3   Train Station  0.06
4  Sandwich Place  0.06


----UPPER HAMPSTEAD WALK----
                venue  freq
0                 Pub  0.10
1                Café  0.10
2              Bakery  0.06
3  Italian Restaurant  0.05
4      Ice Cream Shop  0.05


----WALPOLE GARDENS----
               venue  freq
0                Pub  0.22
1  Convenience Store  0.

In [27]:
# Define a function to return the most common venues/facilities nearby real estate investments#

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Street']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
        
# create a new dataframe
venues_sorted = pd.DataFrame(columns=columns)
venues_sorted['Street'] = london_grouped['Street']

for ind in np.arange(london_grouped.shape[0]):
    venues_sorted.iloc[ind, 1:] = return_most_common_venues(london_grouped.iloc[ind, :], num_top_venues)

In [29]:
venues_sorted.head()


Unnamed: 0,Street,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,ALBION SQUARE,Café,Indian Restaurant,Bar,Pub,Restaurant,Coffee Shop,Park,Seafood Restaurant,Fish & Chips Shop,New American Restaurant
1,ANHALT ROAD,Pub,French Restaurant,Grocery Store,English Restaurant,Garden,Gym / Fitness Center,Japanese Restaurant,Diner,Cocktail Bar,Plaza
2,ANSDELL TERRACE,Juice Bar,Restaurant,Hotel,Pub,Italian Restaurant,Indian Restaurant,Clothing Store,English Restaurant,Lebanese Restaurant,Grocery Store
3,APPLEGARTH ROAD,Nightclub,Pub,Casino,Bar,Food Stand,Food Court,Food Truck,Factory,Falafel Restaurant,Farm
4,BARONSMEAD ROAD,Pub,Coffee Shop,Food & Drink Shop,Sports Club,Breakfast Spot,Movie Theater,Café,Farmers Market,Thai Restaurant,Park


In [30]:
london_grouped=df

After my inspection of venues/facilities/amenities nearby the most profitable real estate investments in London, I could begin by clustering properties by venues/facilities/amenities nearby.

In [31]:

#Dataframe to include Clusters

london_grouped_clustering=df
london_grouped_clustering.head()

Unnamed: 0,Street,Avg_Price,Latitude,Longitude
196,ALBION SQUARE,2450000.0,-41.273758,173.289393
390,ANHALT ROAD,2435000.0,51.480316,-0.166801
405,ANSDELL TERRACE,2250000.0,51.49989,-0.189103
422,APPLEGARTH ROAD,2400000.0,53.748654,-0.32667
856,BARONSMEAD ROAD,2375000.0,51.477315,-0.239457


In [None]:
kclusters = 5
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(london_grouped_clustering)
london_grouped_clustering['Cluster Labels'] = kmeans.labels_
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(london_grouped_clustering['Latitude'], london_grouped_clustering['Longitude'], london_grouped_clustering['Street'], london_grouped_clustering['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [38]:
london_grouped_clustering.loc[london_grouped_clustering['Cluster Labels'] == 0, london_grouped_clustering.columns[[1] + list(range(5, london_grouped_clustering.shape[1]))]].head()

KeyError: 'Cluster Labels'