## 1. Import, install and load. 

In [None]:
!pip install geopy

In [15]:
import numpy as np
import requests
from bs4 import BeautifulSoup
import urllib.request
import pandas as pd
from pandas.io.json import json_normalize

!conda install -c conda-forge geopy --yes

import matplotlib.cm as cm
import matplotlib.colors as colors

!conda install -c conda-forge folium=0.5.0 --yes
import folium

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [16]:
conda update folium

Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /Anaconda/anaconda3

  added / updated specs:
    - folium


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.3.9           |           py37_0         155 KB
    conda-4.7.5                |           py37_0         3.0 MB
    conda-package-handling-1.3.11|           py37_0         260 KB
    geos-3.7.1                 |       h0a44026_0         1.5 MB
    openssl-1.1.1b             |       h1de35cc_1         3.4 MB
    owslib-0.17.1              |             py_0         122 KB
    proj4-5.2.0                |       h0a44026_1         6.9 MB
    pyepsg-0.4.0               |           py37_0          24 KB
    pykdtree-1.3.1             |   py37h1d22016_2          61 KB
    pyproj-1.9.6               |   py37h9c430a6_0          65 KB
    pyshp-2.1.0       

In [17]:
conda update conda-base


PackageNotInstalledError: Package is not installed in prefix.
  prefix: /Anaconda/anaconda3
  package name: conda-base



Note: you may need to restart the kernel to use updated packages.


## 2. Find data from the Web page containing Philadelphia Zip codes, scrape into Jupyter notebook.

In [19]:
#Gets the url and scrapes the html 
url = 'http://ciclt.net/sn/clt/capitolimpact/gw_ziplist.aspx?ClientCode=capitolimpact&State=pa&StName=Pennsylvania&StFIPS=42&FIPS=42101'
req = urllib.request.urlopen(url)


soup = BeautifulSoup(req)

In [20]:
#Finds the table to scrape
table = soup.find('table')

#Provides the empty arrays for the html tags that are being grabbed and assigned to the headings
P = []
C = []

for row in table.find_all('tr'):
    cells = row.find_all('td')
    if len(cells) == 3:
        P.append(cells[0].find(text=True))
        C.append(cells[1].find(text=True))

## 3. Merge Zip code numbers into City data

In [21]:
#Creates the dataframe and places the data in its respective columns
df_phil = pd.DataFrame(P, columns=['PostalCode'])
df_phil['City'] = C
df_phil.head()

Unnamed: 0,PostalCode,City
0,19019,Philadelphia
1,19101,Philadelphia
2,19102,Mid City East
3,19102,Middle City East
4,19102,Philadelphia


In [22]:
#A specialized function that joins the neighborhoods with the same postalcode
foo = lambda a: ','.join(a) 
df_phil = df_phil.groupby(['PostalCode']).agg({
                                'City': foo}).reset_index()

In [23]:
df_phil.head()

Unnamed: 0,PostalCode,City
0,19019,Philadelphia
1,19101,Philadelphia
2,19102,"Mid City East,Middle City East,Philadelphia"
3,19103,"Mid City West,Middle City West,Philadelphia"
4,19104,Philadelphia


## 4. Clean data, removing duplicates and P.O. boxes.

In [24]:
df_lonlat = pd.read_csv('https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/download/?format=csv&refine.state=PA&q=philadelphia&timezone=America/New_York&use_labels_for_header=true', delimiter=';')
df_lonlat.head(48)

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
0,17959,New Philadelphia,PA,40.731739,-76.1278,-5,1,"40.731739,-76.1278"
1,19104,Philadelphia,PA,39.961612,-75.19957,-5,1,"39.961612,-75.19957"
2,19146,Philadelphia,PA,39.938512,-75.18067,-5,1,"39.938512,-75.18067"
3,19105,Philadelphia,PA,40.001811,-75.11787,-5,1,"40.001811,-75.11787"
4,19101,Philadelphia,PA,40.001811,-75.11787,-5,1,"40.001811,-75.11787"
5,19131,Philadelphia,PA,39.981112,-75.22486,-5,1,"39.981112,-75.22486"
6,19175,Philadelphia,PA,39.990562,-75.12957,-5,1,"39.990562,-75.12957"
7,19109,Philadelphia,PA,39.949612,-75.163722,-5,1,"39.949612,-75.163722"
8,19244,Philadelphia,PA,40.001811,-75.11787,-5,1,"40.001811,-75.11787"
9,19255,Philadelphia,PA,40.001811,-75.11787,-5,1,"40.001811,-75.11787"


### 4A. Drop unnecessary columns

In [25]:
df_lonlat.drop(['City', 'State', 'Timezone', 'Daylight savings time flag', 'geopoint'], axis=1, inplace=True)
df_lonlat.head()

Unnamed: 0,Zip,Latitude,Longitude
0,17959,40.731739,-76.1278
1,19104,39.961612,-75.19957
2,19146,39.938512,-75.18067
3,19105,40.001811,-75.11787
4,19101,40.001811,-75.11787


In [26]:
df_lonlat.rename(columns={'Zip':'PostalCode'}, inplace=True)

In [27]:
df_phil.PostalCode = df_phil.PostalCode.astype(int)

## 5. Merge the Zip Code dataframe and the latitude / longitude dataframe.

In [28]:
df_phil = pd.merge(df_phil, df_lonlat, on='PostalCode', how='outer')

## 6. Generate map showing Zip codes.

In [29]:
# create map of Philadelphia using latitude and longitude values
map_phil = folium.Map(location=[39.9524152, -75.1635755], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_phil['Latitude'], df_phil['Longitude'], df_phil['City']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_phil)  
    
map_phil

it can be hard to render map images on Github, but you can see the map in question [here](http://https://github.com/saracw/Coursera---IBM-Capstone-1/blob/master/PhillyClusters1.png).

### 6A. Scrape data from Zip code data page.

In [30]:
#Gets the url and scrapes the html 
url1 = 'https://www.zipdatamaps.com/zipcodes-philadelphia-pa'
req1 = urllib.request.urlopen(url1)

soup1 = BeautifulSoup(req1)

### 6B. Load data into dataframe.

In [31]:
table1 = soup1.find('table', class_='table')

Post = []
Pop = []

for row in table1.find_all('tr'):
    cells = row.find_all('td')
    if len(cells) == 8:
        Post.append(cells[0].find(text=True))
        Pop.append(cells[5].find(text=True))

In [32]:
df_pop = pd.DataFrame(Post, columns=['PostalCode'])
df_pop['Population'] = Pop

In [33]:
df_pop.PostalCode = df_pop.PostalCode.astype(int)

### 6C. Merge the Zip code, latitude, and longitude dataframes with population dataframe.

In [34]:
df_merged = pd.merge(df_phil, df_pop, on='PostalCode', how='outer')

In [None]:
cols = [2, 3, 4, 5, 6, 7, 8, 9, 10]
df_merged.drop(df_merged.columns[cols], axis=0, inplace=True)

In [36]:
df_merged = df_merged.dropna()

In [37]:
df_merged.head()

Unnamed: 0,PostalCode,City,Latitude,Longitude,Population
2,19102,"Mid City East,Middle City East,Philadelphia",39.952962,-75.16558,Philadelphia
3,19103,"Mid City West,Middle City West,Philadelphia",39.952162,-75.17406,Philadelphia
4,19104,Philadelphia,39.961612,-75.19957,Philadelphia
6,19106,Philadelphia,39.951062,-75.14589,Philadelphia
7,19107,Philadelphia,39.952112,-75.15853,Philadelphia


## 7. Submit Foursquare credentials

In [1]:
# define Foursquare Credentials and Version
CLIENT_ID = 'Your Foursquare ID' # your Foursquare ID
CLIENT_SECRET = 'Your Foursquare Secret' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: Your Foursquare ID
CLIENT_SECRET:Your Foursquare Secret


In [39]:
first_nei = df_merged['PostalCode'][2]
first_nei

19102

## 8. Zip Code 19102 covers a large, downtown area - offering a good range of venue categories.

In [40]:
first_nei_lat = df_merged.loc[2,'Latitude']
first_nei_lon = df_merged.loc[2,'Longitude']
print('Latitude and longitude values of {} are {}, {}.'.format(first_nei, 
                                                               first_nei_lat, 
                                                               first_nei_lon))


Latitude and longitude values of 19102 are 39.952962, -75.16558.


In [41]:
radius = 500 
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    first_nei_lat, 
    first_nei_lon, 
    radius, 
    LIMIT)

In [42]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d2e30308afbe0003adb61dd'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'},
    {'name': '$-$$$$', 'key': 'price'}]},
  'headerLocation': 'Center City West',
  'headerFullLocation': 'Center City West, Philadelphia',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 233,
  'suggestedBounds': {'ne': {'lat': 39.9574620045, 'lng': -75.1597206637551},
   'sw': {'lat': 39.9484619955, 'lng': -75.17143933624492}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bde0d566198c9b6c5cc12ff',
       'name': 'Dilworth Park',
       'location': {'address': '1 Penn Sq',
        'crossStreet': 'West Side of City Hall',
        'lat': 39.952771846343104,
        'lng': -

In [43]:
results['response']['groups'][0]['items'][0]['venue']['categories'][0]['name']

'Park'

In [44]:
venues=results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)
nearby_venues.columns

Index(['reasons.count', 'reasons.items', 'referralId', 'venue.categories',
       'venue.delivery.id', 'venue.delivery.provider.icon.name',
       'venue.delivery.provider.icon.prefix',
       'venue.delivery.provider.icon.sizes', 'venue.delivery.provider.name',
       'venue.delivery.url', 'venue.events.count', 'venue.events.summary',
       'venue.id', 'venue.location.address', 'venue.location.cc',
       'venue.location.city', 'venue.location.country',
       'venue.location.crossStreet', 'venue.location.distance',
       'venue.location.formattedAddress', 'venue.location.labeledLatLngs',
       'venue.location.lat', 'venue.location.lng',
       'venue.location.neighborhood', 'venue.location.postalCode',
       'venue.location.state', 'venue.name', 'venue.photos.count',
       'venue.photos.groups', 'venue.venuePage.id'],
      dtype='object')

## 9. Gather information about the venues in this area from Foursquare.

In [45]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [46]:
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]
nearby_venues

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Dilworth Park,"[{'id': '4bf58dd8d48988d163941735', 'name': 'P...",39.952772,-75.164723
1,La Colombe Coffee Roasters,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",39.951659,-75.165238
2,City Hall Courtyard,"[{'id': '4bf58dd8d48988d164941735', 'name': 'P...",39.952484,-75.163592
3,One Liberty Observation Deck,"[{'id': '4bf58dd8d48988d165941735', 'name': 'S...",39.952740,-75.168068
4,JFK Plaza / Love Park,"[{'id': '4bf58dd8d48988d164941735', 'name': 'P...",39.954123,-75.165303
5,"The Ritz-Carlton, Philadelphia","[{'id': '4bf58dd8d48988d1fa931735', 'name': 'H...",39.951446,-75.164149
6,sweetgreen,"[{'id': '4bf58dd8d48988d1bd941735', 'name': 'S...",39.953115,-75.167258
7,UNIQLO,"[{'id': '4bf58dd8d48988d103951735', 'name': 'C...",39.951426,-75.167634
8,Del Frisco's Double Eagle Steak House,"[{'id': '4bf58dd8d48988d1cc941735', 'name': 'S...",39.950795,-75.165389
9,R2L,"[{'id': '4bf58dd8d48988d14e941735', 'name': 'A...",39.951714,-75.167485


In [47]:
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Dilworth Park,Park,39.952772,-75.164723
1,La Colombe Coffee Roasters,Coffee Shop,39.951659,-75.165238
2,City Hall Courtyard,Plaza,39.952484,-75.163592
3,One Liberty Observation Deck,Scenic Lookout,39.952740,-75.168068
4,JFK Plaza / Love Park,Plaza,39.954123,-75.165303
5,"The Ritz-Carlton, Philadelphia",Hotel,39.951446,-75.164149
6,sweetgreen,Salad Place,39.953115,-75.167258
7,UNIQLO,Clothing Store,39.951426,-75.167634
8,Del Frisco's Double Eagle Steak House,Steakhouse,39.950795,-75.165389
9,R2L,American Restaurant,39.951714,-75.167485


## 10. Retrieve nearby venues for all Zip Codes

In [48]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        venue_results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in venue_results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['PostalCode', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [49]:
phil_venues = getNearbyVenues(names = df_merged['PostalCode'],
                                   latitudes = df_merged['Latitude'],
                                   longitudes = df_merged['Longitude']
                                  )

19102
19103
19104
19106
19107
19111
19112
19114
19115
19116
19118
19119
19120
19121
19122
19123
19124
19125
19126
19127
19128
19129
19130
19131
19132
19133
19134
19135
19136
19137
19138
19139
19140
19141
19142
19143
19144
19145
19146
19147
19148
19149
19150
19151
19152
19153
19154


In [50]:
print(phil_venues.shape)
phil_venues.head()

(1161, 7)


Unnamed: 0,PostalCode,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,19102,39.952962,-75.16558,Dilworth Park,39.952772,-75.164723,Park
1,19102,39.952962,-75.16558,La Colombe Coffee Roasters,39.951659,-75.165238,Coffee Shop
2,19102,39.952962,-75.16558,City Hall Courtyard,39.952484,-75.163592,Plaza
3,19102,39.952962,-75.16558,One Liberty Observation Deck,39.95274,-75.168068,Scenic Lookout
4,19102,39.952962,-75.16558,JFK Plaza / Love Park,39.954123,-75.165303,Plaza


In [51]:
phil_venues.groupby('PostalCode').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
19102,100,100,100,100,100,100
19103,100,100,100,100,100,100
19104,16,16,16,16,16,16
19106,100,100,100,100,100,100
19107,100,100,100,100,100,100
19111,12,12,12,12,12,12
19112,1,1,1,1,1,1
19114,7,7,7,7,7,7
19115,3,3,3,3,3,3
19116,12,12,12,12,12,12


In [52]:
print('There are {} unique categories.'.format(len(phil_venues['Venue Category'].unique())))

There are 224 unique categories.


In [53]:
phil_onehot = pd.get_dummies(phil_venues[['Venue Category']], prefix="", prefix_sep="")

phil_onehot['PostalCode'] = phil_venues['PostalCode'] 

fixed_columns = [phil_onehot.columns[-1]] + list(phil_onehot.columns[:-1])
phil_onehot = phil_onehot[fixed_columns]
phil_onehot.head()

Unnamed: 0,PostalCode,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo Exhibit
0,19102,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,19102,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,19102,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,19102,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,19102,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [54]:
phil_grouped = phil_onehot.groupby('PostalCode').mean().reset_index()
phil_grouped

Unnamed: 0,PostalCode,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo Exhibit
0,19102,0.01,0.0,0.0,0.04,0.0,0.01,0.01,0.0,0.0,...,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0
1,19103,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.01,...,0.03,0.0,0.0,0.01,0.01,0.01,0.0,0.01,0.02,0.0
2,19104,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,19106,0.0,0.01,0.0,0.04,0.03,0.0,0.0,0.0,0.01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,19107,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,...,0.01,0.01,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0
5,19111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,19112,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,19114,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,19115,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,19116,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
phil_grouped.PostalCode = phil_grouped.PostalCode.astype(str)

## 11. Identify the most common venues for each Zip code.

In [56]:
num_top_venues = 5
for code in phil_grouped['PostalCode']:
    print("----"+code+"----")
    temp = phil_grouped[phil_grouped['PostalCode'] == code].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----19102----
                 venue  freq
0          Coffee Shop  0.07
1                Hotel  0.06
2  American Restaurant  0.04
3   Seafood Restaurant  0.03
4          Yoga Studio  0.03


----19103----
                     venue  freq
0      American Restaurant  0.05
1              Coffee Shop  0.05
2            Deli / Bodega  0.04
3         Sushi Restaurant  0.03
4  New American Restaurant  0.03


----19104----
                  venue  freq
0    Light Rail Station  0.12
1           Pizza Place  0.12
2         Deli / Bodega  0.06
3        Sandwich Place  0.06
4  Caribbean Restaurant  0.06


----19106----
                 venue  freq
0       History Museum  0.08
1        Historic Site  0.06
2          Coffee Shop  0.05
3  American Restaurant  0.04
4             Boutique  0.04


----19107----
                venue  freq
0              Bakery  0.07
1      Sandwich Place  0.05
2        Burger Joint  0.03
3  Chinese Restaurant  0.03
4               Hotel  0.03


----19111----
            

In [57]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [58]:
phil_grouped.PostalCode = phil_grouped.PostalCode.astype(int)

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['PostalCode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = phil_grouped['PostalCode']

for ind in np.arange(phil_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(phil_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,19102,Coffee Shop,Hotel,American Restaurant,Salad Place,Seafood Restaurant,Cosmetics Shop,Spa,Yoga Studio,Italian Restaurant,Gourmet Shop
1,19103,American Restaurant,Coffee Shop,Deli / Bodega,Sushi Restaurant,Bar,Vegetarian / Vegan Restaurant,New American Restaurant,Japanese Restaurant,Comedy Club,Taco Place
2,19104,Pizza Place,Light Rail Station,Cosmetics Shop,Park,Caribbean Restaurant,Chinese Restaurant,Spa,Toy / Game Store,Coffee Shop,Sandwich Place
3,19106,History Museum,Historic Site,Coffee Shop,American Restaurant,Boutique,Hotel,Gastropub,Art Gallery,New American Restaurant,Bar
4,19107,Bakery,Sandwich Place,Burger Joint,Chinese Restaurant,Hotel,Pharmacy,Concert Hall,Bubble Tea Shop,Snack Place,Pub
5,19111,Deli / Bodega,Pizza Place,Convenience Store,Construction & Landscaping,Snack Place,Diner,Coffee Shop,Chinese Restaurant,Park,Baseball Field
6,19112,Food Truck,Yoga Studio,Food,Flower Shop,Fish Market,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Falafel Restaurant
7,19114,Golf Course,Donut Shop,Construction & Landscaping,Discount Store,Gym / Fitness Center,Sporting Goods Shop,English Restaurant,Fish Market,Filipino Restaurant,Field
8,19115,Pool,Italian Restaurant,Bakery,Zoo Exhibit,Eastern European Restaurant,Flower Shop,Fish Market,Filipino Restaurant,Field,Fast Food Restaurant
9,19116,Indian Restaurant,Donut Shop,Caucasian Restaurant,Chinese Restaurant,Beer Store,Beer Garden,Liquor Store,Bank,Grocery Store,Spa


## 12. Identify clusters within data.

In [59]:
from sklearn.cluster import KMeans

kclusters = 3

phil_grouped_clustering = phil_grouped.drop('PostalCode', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(phil_grouped_clustering)

kmeans.labels_



array([1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0], dtype=int32)

In [60]:
phil_merged = df_merged[0:47]

phil_merged['Cluster Labels'] = kmeans.labels_

phil_merged = phil_merged.join(neighborhoods_venues_sorted.set_index('PostalCode'), on='PostalCode')

phil_merged.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,PostalCode,City,Latitude,Longitude,Population,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,19102,"Mid City East,Middle City East,Philadelphia",39.952962,-75.16558,Philadelphia,1,Coffee Shop,Hotel,American Restaurant,Salad Place,Seafood Restaurant,Cosmetics Shop,Spa,Yoga Studio,Italian Restaurant,Gourmet Shop
3,19103,"Mid City West,Middle City West,Philadelphia",39.952162,-75.17406,Philadelphia,1,American Restaurant,Coffee Shop,Deli / Bodega,Sushi Restaurant,Bar,Vegetarian / Vegan Restaurant,New American Restaurant,Japanese Restaurant,Comedy Club,Taco Place
4,19104,Philadelphia,39.961612,-75.19957,Philadelphia,1,Pizza Place,Light Rail Station,Cosmetics Shop,Park,Caribbean Restaurant,Chinese Restaurant,Spa,Toy / Game Store,Coffee Shop,Sandwich Place
6,19106,Philadelphia,39.951062,-75.14589,Philadelphia,1,History Museum,Historic Site,Coffee Shop,American Restaurant,Boutique,Hotel,Gastropub,Art Gallery,New American Restaurant,Bar
7,19107,Philadelphia,39.952112,-75.15853,Philadelphia,1,Bakery,Sandwich Place,Burger Joint,Chinese Restaurant,Hotel,Pharmacy,Concert Hall,Bubble Tea Shop,Snack Place,Pub


## 13. Generate map with clusters superimposed. 

In [61]:
map_clusters = folium.Map(location=[39.9524152, -75.1635755], zoom_start=11)

x = np.arange(kclusters)
colors_array = cm.rainbow(np.linspace(0, 1, kclusters))
rainbow = [colors.rgb2hex(i) for i in colors_array]
print(rainbow)

markers_colors = []
for lat, lon, nei , cluster in zip(phil_merged['Latitude'], phil_merged['Longitude'], phil_merged['PostalCode'], phil_merged['Cluster Labels']):
    label = folium.Popup(str(nei) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

['#8000ff', '#80ffb4', '#ff0000']


[Philadelphia, clustered by venues](http://https://github.com/saracw/Coursera---IBM-Capstone-1/blob/master/PhillyClusters2.png)

In [62]:
phil_merged.loc[phil_merged['Cluster Labels'] == 0, phil_merged.columns[[0] + list(range(5, phil_merged.shape[1]))]]

Unnamed: 0,PostalCode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
24,19126,0,Convenience Store,Pizza Place,Pharmacy,Deli / Bodega,Plaza,Intersection,Chinese Restaurant,Japanese Restaurant,Donut Shop,Korean Restaurant
33,19135,0,Food,Pizza Place,Playground,Fast Food Restaurant,Donut Shop,Pharmacy,Pub,Convenience Store,Intersection,Deli / Bodega
34,19136,0,Intersection,Pizza Place,Discount Store,Zoo Exhibit,Pharmacy,Clothing Store,Bakery,Gym,Coffee Shop,Convenience Store
35,19137,0,Convenience Store,Pizza Place,Pharmacy,Bus Station,Clothing Store,Food & Drink Shop,American Restaurant,Speakeasy,Café,Gym / Fitness Center
40,19142,0,Ice Cream Shop,African Restaurant,Pizza Place,Health & Beauty Service,Pharmacy,Eastern European Restaurant,Fish Market,Filipino Restaurant,Field,Fast Food Restaurant
41,19143,0,Intersection,Supermarket,Southern / Soul Food Restaurant,Pharmacy,Discount Store,Zoo Exhibit,English Restaurant,Fish Market,Filipino Restaurant,Field
49,19151,0,Supermarket,Food & Drink Shop,Bus Station,Pizza Place,Pharmacy,Seafood Restaurant,Zoo Exhibit,English Restaurant,Filipino Restaurant,Field
52,19154,0,Convenience Store,Deli / Bodega,Pizza Place,Pharmacy,Zoo Exhibit,Eastern European Restaurant,Fish Market,Filipino Restaurant,Field,Fast Food Restaurant


In [63]:
phil_merged.loc[phil_merged['Cluster Labels'] == 1, phil_merged.columns[[0] + list(range(5, phil_merged.shape[1]))]]

Unnamed: 0,PostalCode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,19102,1,Coffee Shop,Hotel,American Restaurant,Salad Place,Seafood Restaurant,Cosmetics Shop,Spa,Yoga Studio,Italian Restaurant,Gourmet Shop
3,19103,1,American Restaurant,Coffee Shop,Deli / Bodega,Sushi Restaurant,Bar,Vegetarian / Vegan Restaurant,New American Restaurant,Japanese Restaurant,Comedy Club,Taco Place
4,19104,1,Pizza Place,Light Rail Station,Cosmetics Shop,Park,Caribbean Restaurant,Chinese Restaurant,Spa,Toy / Game Store,Coffee Shop,Sandwich Place
6,19106,1,History Museum,Historic Site,Coffee Shop,American Restaurant,Boutique,Hotel,Gastropub,Art Gallery,New American Restaurant,Bar
7,19107,1,Bakery,Sandwich Place,Burger Joint,Chinese Restaurant,Hotel,Pharmacy,Concert Hall,Bubble Tea Shop,Snack Place,Pub
11,19111,1,Deli / Bodega,Pizza Place,Convenience Store,Construction & Landscaping,Snack Place,Diner,Coffee Shop,Chinese Restaurant,Park,Baseball Field
13,19114,1,Golf Course,Donut Shop,Construction & Landscaping,Discount Store,Gym / Fitness Center,Sporting Goods Shop,English Restaurant,Fish Market,Filipino Restaurant,Field
14,19115,1,Pool,Italian Restaurant,Bakery,Zoo Exhibit,Eastern European Restaurant,Flower Shop,Fish Market,Filipino Restaurant,Field,Fast Food Restaurant
15,19116,1,Indian Restaurant,Donut Shop,Caucasian Restaurant,Chinese Restaurant,Beer Store,Beer Garden,Liquor Store,Bank,Grocery Store,Spa
16,19118,1,Bakery,Boutique,American Restaurant,Ice Cream Shop,Coffee Shop,Clothing Store,Grocery Store,Brewery,Bank,Caribbean Restaurant


In [64]:
phil_merged.loc[phil_merged['Cluster Labels'] == 2, phil_merged.columns[[0] + list(range(5, phil_merged.shape[1]))]]

Unnamed: 0,PostalCode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,19112,2,Food Truck,Yoga Studio,Food,Flower Shop,Fish Market,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Falafel Restaurant


## 14. Overview of results

## 15. Discussion

## 16. Implications and Conclusions