# IBM Data Science Capstone

## T Farrington

## Week 3: Segmenting & Clustering  Toronto Neighbourhoods

### Part 1 - Get Neighbourhoods Dataframe

#### Import modules

In [146]:
print('Importing modules ...')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
from bs4 import BeautifulSoup
from sklearn.cluster import KMeans
import folium
from geopy.geocoders import Nominatim
import geocoder
import requests

print('Modules imported')

Importing modules ...
Modules imported


#### Scrape html table

In [85]:
# Source table url
url = r'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
# Request and extract table
res = requests.get(url)
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0]
# Convert table to dataframe
df = pd.read_html(str(table))
df = df[0]

In [86]:
headers = df.iloc[0]
suburbs_raw  = pd.DataFrame(df.values[1:], columns=headers)
# clean dataframe - replace true NaNs
suburbs_raw = suburbs_raw.replace('Not assigned', np.nan)
# drop Borough NaNs 
suburbs_raw = suburbs_raw[pd.notnull(suburbs_raw['Borough'])]
# set Neighbourhood NaNs to Borough
suburbs_raw.Neighbourhood.fillna(suburbs_raw.Borough, inplace = True)
# Group by Postcode and concatenate Neighborhood values
suburbs = suburbs_raw.groupby(['Postcode','Borough'])['Neighbourhood'].agg(', '.join)
# Reset row index
suburbs = suburbs.to_frame().reset_index()

#### Dataframe shape

In [87]:
print(f'The cleaned dataframe "suburbs" has the shape: {suburbs.shape}')

The cleaned dataframe "suburbs" has the shape: (103, 3)


### Part 2 - Geocode Postcode Centroids

#### Get postcode series

In [88]:
postcodes = suburbs['Postcode'].tolist()

#### Geocode Postcodes

In [93]:
# EITHER:
# initialise coordinate dataframe
#coordinates = pd.DataFrame(np.nan, index=postcodes, columns=['Latitude','Longitude'])
# Geocode postcodes
#print(f'Starting geocoding ...')
#for postcode in postcodes:
#    coords = None
#    print(f'Geocoding {postcode} ...')
#    while coords is None:
#        g = geocoder.google(f'{postcode}, Toronto, Canada')
#        coords = g.latlng
#    coordinates[postcode]['Latitude'] = coords[0]
#    coordinates[postcode]['Latitude'] = coords[1]
#    print(f'{postcode} geocoded!')
#print(f'Geocoding complete!')
#coordinates.head()
# OR:
coordinates = pd.read_csv('https://cocl.us/Geospatial_data/Geospatial_Coordinates.csv')
coordinates.rename(columns={'Postal Code': 'Postcode'}, inplace=True)
coordinates.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Merge dataframes

In [105]:
geo_suburbs = pd.merge(suburbs, coordinates, on=['Postcode'])
geo_suburbs.head(50)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [104]:
geo_suburbs.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 103 entries, 0 to 102
Data columns (total 5 columns):
Postcode         103 non-null object
Borough          103 non-null object
Neighbourhood    103 non-null object
Latitude         103 non-null float64
Longitude        103 non-null float64
dtypes: float64(2), object(3)
memory usage: 4.8+ KB


### Part 3 - Toronto Suburbs

#### Set-up

##### Toronto coordinates

In [96]:
address = 'Toronto, Canada'
geolocator = Nominatim(user_agent="my_explorer")
location = geolocator.geocode(address)
t_lat, t_long = location.latitude, location.longitude
print(f'{address} is at {t_lat}, {t_long}')

Toronto, Canada is at 43.653963, -79.387207


##### Foursquare initialisation

In [98]:
client_id = 'XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY'
client_secret = 'FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP'
version = 20180605
url_explore = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&'.format(
    client_id, 
    client_secret, 
    version)
print(f'foursquare initialised!')
print(url_explore)

# extra parameters ll={},{}&radius={}&limit={}

foursquare initialised!
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605


##### Define venue function

In [114]:
def getVenues(postcodes, lats, longs, radius = 500, limit = 100):
    '''
    get up to 100 venues for each postcode within radius of postcode centroid
    '''
    # initialise list
    venues_list = []
    # loop through postcodes
    for postcode, lat, long in zip(postcodes, lats, longs):
        
        print(f'Searching {postcode} ...', end="\r")
        
        # create query
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            client_id,
            client_secret,
            version,
            lat,
            long,
            radius,
            limit)
        
        # execute query
        results = requests.get(url).json()["response"]['groups'][0]['items']
        #print(results)
        
        # clean query response
        venues_list.append([(
            postcode, 
            lat, 
            long, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
    # create dataframe
    venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    venues.columns = ['Postcode', 
                  'Postcode Latitude', 
                  'Postcode Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return venues    

#### Search Toronto by postcode

In [115]:
toronto_venues = getVenues(postcodes=geo_suburbs['Postcode'],
                          lats=geo_suburbs['Latitude'],
                          longs=geo_suburbs['Longitude'],
                          radius = 500)
print(toronto_venues.shape)
toronto_venues.head()

Searching M1B ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.806686299999996,-79.19435340000001&radius=500&limit=100
Searching M1C ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.7845351,-79.16049709999999&radius=500&limit=100
Searching M1E ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.7635726,-79.1887115&radius=500&limit=100
Searching M1G ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.7709921,-79.21691740000001&radius=500&limit

Searching M3N ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.7616313,-79.52099940000001&radius=500&limit=100
Searching M4A ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.725882299999995,-79.31557159999998&radius=500&limit=100
Searching M4B ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.7063972,-79.309937&radius=500&limit=100
Searching M4C ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.695343900000005,-79.3183887&radius=500&limit

Searching M5S ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.6626956,-79.4000493&radius=500&limit=100
Searching M5T ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.6532057,-79.4000493&radius=500&limit=100
Searching M5V ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.6289467,-79.3944199&radius=500&limit=100
Searching M5W ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.6464352,-79.37484599999999&radius=500&limit=100
Searching M5X ...

Searching M9P ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.696319,-79.53224240000002&radius=500&limit=100
Searching M9R ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.6889054,-79.55472440000001&radius=500&limit=100
Searching M9V ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.739416399999996,-79.5884369&radius=500&limit=100
Searching M9W ...
https://api.foursquare.com/v2/venues/explore?&client_id=XR1HMBG5YMM1JXLF4CEEDSQWWBCQVGIEUHQXS5Z0LMHFYJNY&client_secret=FR0DFYDLSAI01MTY1CEKM1UXV0BICQLY412HRNR2ZQHAVFUP&v=20180605&ll=43.706748299999994,-79.5940544&radius=500&limit

Unnamed: 0,Postcode,Postcode Latitude,Postcode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,M1C,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,M1E,43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
3,M1E,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,M1E,43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant


#### Analysing each postcode

In [125]:
print('There are {} unique venue categories in Toronto.'.format(len(toronto_venues['Venue Category'].unique())))

# one-hot-encoding venue categories
toronto_encoded = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
# add postcodes back
toronto_encoded['Postcode'] = toronto_venues['Postcode']
# and reorganise column order
fixed_columns = [toronto_encoded.columns[-1]] + list(toronto_encoded.columns[:-1])
toronto_encoded = toronto_encoded[fixed_columns]
print(f'Encoded dataframe shape: {toronto_encoded.shape}')
#toronto_encoded.head()
# aggregate encoded venues by postcode
toronto_grouped = toronto_encoded.groupby('Postcode').mean().reset_index()
print(f'Grouped dataframe shape: {toronto_grouped.shape}')
#toronto_grouped

There are 273 unique venue categories in Toronto.
Encoded dataframe shape: (2256, 274)
Grouped dataframe shape: (99, 274)


In [138]:
del postcode_venues

#### Top 10 venues per postcode

In [139]:
def return_most_common_venues(row, n):
    '''
    returns top n most frequent venues in descending order
    '''
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:n]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
postcode_venues = pd.DataFrame(columns=columns)
postcode_venues['Postcode'] = toronto_grouped['Postcode']

for ind in np.arange(toronto_grouped.shape[0]):
    postcode_venues.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

# view shape and head of postcode_venue dataframe
print(f'Postcode_venues shape: {postcode_venues.shape}')
postcode_venues.head()


Postcode_venues shape: (99, 11)


Unnamed: 0,Postcode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Fast Food Restaurant,Yoga Studio,Doner Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
1,M1C,Bar,Yoga Studio,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop,Falafel Restaurant
2,M1E,Electronics Store,Breakfast Spot,Intersection,Rental Car Location,Mexican Restaurant,Medical Center,Pizza Place,Drugstore,Donut Shop,Doner Restaurant
3,M1G,Coffee Shop,Convenience Store,Korean Restaurant,Yoga Studio,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
4,M1H,Hakka Restaurant,Fried Chicken Joint,Bakery,Athletics & Sports,Thai Restaurant,Bank,Caribbean Restaurant,Dim Sum Restaurant,Diner,Discount Store


#### Cluster postcodes into 5 clusters

In [140]:
# really should optimise for k but going with 5 for now
# number of clusters
k = 5

# prep dataframe for clustering
toronto_clustering = toronto_grouped.drop('Postcode', 1)
# set-up and fit k-means clustering
kmeans = KMeans(n_clusters=k, random_state=0).fit(toronto_clustering)
# add cluster labels into postcode_venues dataframe
postcode_venues.insert(0, 'Cluster Labels', kmeans.labels_)
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_postcodes = geo_suburbs
toronto_postcodes = toronto_postcodes.join(postcode_venues.set_index('Postcode'), on='Postcode')
# check results
toronto_postcodes.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,2.0,Fast Food Restaurant,Yoga Studio,Doner Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,4.0,Bar,Yoga Studio,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop,Falafel Restaurant
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0.0,Electronics Store,Breakfast Spot,Intersection,Rental Car Location,Mexican Restaurant,Medical Center,Pizza Place,Drugstore,Donut Shop,Doner Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,Coffee Shop,Convenience Store,Korean Restaurant,Yoga Studio,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Hakka Restaurant,Fried Chicken Joint,Bakery,Athletics & Sports,Thai Restaurant,Bank,Caribbean Restaurant,Dim Sum Restaurant,Diner,Discount Store


In [162]:
# Cluster labels are not integers so something's wrong
print('Cluster labels: {}'.format(toronto_postcodes['Cluster Labels'].unique()))
# Labels include NaN values for postcodes where no 4square results were returned
# therefore could either drop these postcodes or
# set NaN values to another integer value, e.g. 99, to include
# all postcodes in output but clearly indicating no venue results
# dropping rows with no venues
toronto_postcodes.dropna(axis = 0, how ='any', inplace = True)
toronto_postcodes=toronto_postcodes.astype({'Cluster Labels': int})
print('Cleaned cluster labels: {}'.format(toronto_postcodes['Cluster Labels'].unique()))
toronto_postcodes.head()

Cluster labels: [2 4 0 1 3]
Cleaned cluster labels: [2 4 0 1 3]


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,2,Fast Food Restaurant,Yoga Studio,Doner Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,4,Bar,Yoga Studio,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop,Falafel Restaurant
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0,Electronics Store,Breakfast Spot,Intersection,Rental Car Location,Mexican Restaurant,Medical Center,Pizza Place,Drugstore,Donut Shop,Doner Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Convenience Store,Korean Restaurant,Yoga Studio,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,Hakka Restaurant,Fried Chicken Joint,Bakery,Athletics & Sports,Thai Restaurant,Bank,Caribbean Restaurant,Dim Sum Restaurant,Diner,Discount Store


### Create map

In [165]:
# create map
map_clusters = folium.Map(location=[t_lat, t_long], zoom_start=10)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_postcodes['Latitude'], toronto_postcodes['Longitude'], toronto_postcodes['Postcode'], toronto_postcodes['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Observations

It appears that Toronto has a even coverage of postcodes in clusters 0 and 1 with cluster 1 appearing less frequently.

Clusters 2 to 4 occur only once each on the outskirts of Toronto, perhaps indicating the postcode is home to perhaps a mall or industrial park.

Interestingly 'downtown' Toronto all belongs to cluster 0, where we might expect a higher density of commercial/financial venues.

##### Further work

The work in this notebook illustrates a very basic clustering analysis of the postcodes of Toronto.

Further analysis can be performed to analyse venue sub-categories, e.g. where are various types of restaurants clustered? Can postcodes be profiled?

