In [1]:
import numpy as np
import requests
from bs4 import BeautifulSoup
import urllib.request
import pandas as pd
from pandas.io.json import json_normalize

!conda install -c conda-forge geopy --yes

import matplotlib.cm as cm
import matplotlib.colors as colors

!conda install -c conda-forge folium=0.5.0 --yes
import folium

Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.



In [2]:
#Gets the url and scrapes the html 
url = 'http://ciclt.net/sn/clt/capitolimpact/gw_ziplist.aspx?ClientCode=capitolimpact&State=pa&StName=Pennsylvania&StFIPS=42&FIPS=42101'
req = urllib.request.urlopen(url)

soup = BeautifulSoup(req)

In [3]:
#Finds the table to scrape
table = soup.find('table')

#Provides the empty arrays for the html tags that are being grabbed and assigned to the headings
P = []
C = []

for row in table.find_all('tr'):
    cells = row.find_all('td')
    if len(cells) == 3:
        P.append(cells[0].find(text=True))
        C.append(cells[1].find(text=True))

In [4]:
#Creates the dataframe and places the data in its respective columns
df_phil = pd.DataFrame(P, columns=['PostalCode'])
df_phil['City'] = C
df_phil.head(0)

Unnamed: 0,PostalCode,City


In [5]:
#A specialized function that joins the neighborhoods with the same postalcode
foo = lambda a: ','.join(a) 
df_phil = df_phil.groupby(['PostalCode']).agg({
                                'City': foo}).reset_index()

In [6]:
df_phil.head(84)

Unnamed: 0,PostalCode,City
0,19019,Philadelphia
1,19101,Philadelphia
2,19102,"Mid City East,Middle City East,Philadelphia"
3,19103,"Mid City West,Middle City West,Philadelphia"
4,19104,Philadelphia
5,19105,Philadelphia
6,19106,Philadelphia
7,19107,Philadelphia
8,19108,Philadelphia
9,19109,Philadelphia


In [7]:
df_lonlat = pd.read_csv('https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/download/?format=csv&refine.state=PA&q=philadelphia&timezone=America/New_York&use_labels_for_header=true', delimiter=';')
df_lonlat.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
0,19109,Philadelphia,PA,39.949612,-75.163722,-5,1,"39.949612,-75.163722"
1,19175,Philadelphia,PA,39.990562,-75.12957,-5,1,"39.990562,-75.12957"
2,19019,Philadelphia,PA,40.001811,-75.11787,-5,1,"40.001811,-75.11787"
3,19173,Philadelphia,PA,40.001811,-75.11787,-5,1,"40.001811,-75.11787"
4,19134,Philadelphia,PA,39.991712,-75.11116,-5,1,"39.991712,-75.11116"


In [15]:
df_lonlat = pd.read_csv('https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/download/?format=csv&refine.state=PA&q=philadelphia&timezone=America/New_York&use_labels_for_header=true', delimiter=';')
df_lonlat.head(48)

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
0,19175,Philadelphia,PA,39.990562,-75.12957,-5,1,"39.990562,-75.12957"
1,19109,Philadelphia,PA,39.949612,-75.163722,-5,1,"39.949612,-75.163722"
2,19142,Philadelphia,PA,39.922612,-75.23453,-5,1,"39.922612,-75.23453"
3,19162,Philadelphia,PA,40.001811,-75.11787,-5,1,"40.001811,-75.11787"
4,19099,Philadelphia,PA,40.001811,-75.11787,-5,1,"40.001811,-75.11787"
5,19149,Philadelphia,PA,40.037711,-75.06658,-5,1,"40.037711,-75.06658"
6,19152,Philadelphia,PA,40.059611,-75.04837,-5,1,"40.059611,-75.04837"
7,19108,Philadelphia,PA,39.959662,-75.1605,-5,1,"39.959662,-75.1605"
8,19173,Philadelphia,PA,40.001811,-75.11787,-5,1,"40.001811,-75.11787"
9,19134,Philadelphia,PA,39.991712,-75.11116,-5,1,"39.991712,-75.11116"


In [9]:
df_lonlat.rename(columns={'Zip':'PostalCode'}, inplace=True)

In [10]:
df_phil.PostalCode = df_phil.PostalCode.astype(int)

In [11]:
df_phil = pd.merge(df_phil, df_lonlat, on='PostalCode', how='outer')

In [12]:
!pip install folium



In [None]:
address = 'Philadelphia, Pennsylvania'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

In [17]:
# create map of Philadelphia using latitude and longitude values
map_phil = folium.Map(location=[39.9524152, -75.1635755], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_phil['Latitude'], df_phil['Longitude'], df_phil['City']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_phil)  
    
map_phil

In [18]:
#Gets the url and scrapes the html 
url1 = 'https://www.zipdatamaps.com/zipcodes-philadelphia-pa'
req1 = urllib.request.urlopen(url1)

soup1 = BeautifulSoup(req1)

In [19]:
table1 = soup1.find('table', class_='table')

Post = []
Pop = []

for row in table1.find_all('tr'):
    cells = row.find_all('td')
    if len(cells) == 8:
        Post.append(cells[0].find(text=True))
        Pop.append(cells[5].find(text=True))

In [20]:
df_pop = pd.DataFrame(Post, columns=['PostalCode'])
df_pop['Population'] = Pop

In [21]:
df_pop.PostalCode = df_pop.PostalCode.astype(int)

In [22]:
df_merged = pd.merge(df_phil, df_pop, on='PostalCode', how='outer')

In [23]:
df_merged = df_merged.dropna()

In [24]:
df_merged.head()

Unnamed: 0,PostalCode,City,Latitude,Longitude,Population
2,19102,"Mid City East,Middle City East,Philadelphia",39.952962,-75.16558,Philadelphia
3,19103,"Mid City West,Middle City West,Philadelphia",39.952162,-75.17406,Philadelphia
4,19104,Philadelphia,39.961612,-75.19957,Philadelphia
6,19106,Philadelphia,39.951062,-75.14589,Philadelphia
7,19107,Philadelphia,39.952112,-75.15853,Philadelphia


In [25]:
# define Foursquare Credentials and Version
CLIENT_ID = 'PKVKP4RSK1UDWBSX241ZZ2BFGZ2055XNG13MU1OZ5G4HO0FY' # your Foursquare ID
CLIENT_SECRET = 'IGWQRC4QT0EDQ0SSRIB4MEURVPRJ2TYL2LJP1BK55TZVGDIU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PKVKP4RSK1UDWBSX241ZZ2BFGZ2055XNG13MU1OZ5G4HO0FY
CLIENT_SECRET:IGWQRC4QT0EDQ0SSRIB4MEURVPRJ2TYL2LJP1BK55TZVGDIU


In [26]:
first_nei = df_merged['PostalCode'][2]
first_nei

19102

In [27]:
first_nei_lat = df_merged.loc[2,'Latitude']
first_nei_lon = df_merged.loc[2,'Longitude']
print('Latitude and longitude values of {} are {}, {}.'.format(first_nei, 
                                                               first_nei_lat, 
                                                               first_nei_lon))


Latitude and longitude values of 19102 are 39.952962, -75.16558.


In [28]:
radius = 500 
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    first_nei_lat, 
    first_nei_lon, 
    radius, 
    LIMIT)

In [29]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d224eb3a6ec98002c71451b'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': '$-$$$$', 'key': 'price'},
    {'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Center City West',
  'headerFullLocation': 'Center City West, Philadelphia',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 235,
  'suggestedBounds': {'ne': {'lat': 39.9574620045, 'lng': -75.1597206637551},
   'sw': {'lat': 39.9484619955, 'lng': -75.17143933624492}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bde0d566198c9b6c5cc12ff',
       'name': 'Dilworth Park',
       'location': {'address': '1 Penn Sq',
        'crossStreet': 'West Side of City Hall',
        'lat': 39.952771846343104,
        'lng': -

In [30]:
results['response']['groups'][0]['items'][0]['venue']['categories'][0]['name']

'Park'

In [31]:
venues=results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)
nearby_venues.columns

Index(['reasons.count', 'reasons.items', 'referralId', 'venue.categories',
       'venue.delivery.id', 'venue.delivery.provider.icon.name',
       'venue.delivery.provider.icon.prefix',
       'venue.delivery.provider.icon.sizes', 'venue.delivery.provider.name',
       'venue.delivery.url', 'venue.events.count', 'venue.events.summary',
       'venue.id', 'venue.location.address', 'venue.location.cc',
       'venue.location.city', 'venue.location.country',
       'venue.location.crossStreet', 'venue.location.distance',
       'venue.location.formattedAddress', 'venue.location.labeledLatLngs',
       'venue.location.lat', 'venue.location.lng',
       'venue.location.neighborhood', 'venue.location.postalCode',
       'venue.location.state', 'venue.name', 'venue.photos.count',
       'venue.photos.groups', 'venue.venuePage.id'],
      dtype='object')

In [32]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [33]:
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]
nearby_venues

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Dilworth Park,"[{'id': '4bf58dd8d48988d163941735', 'name': 'P...",39.952772,-75.164723
1,La Colombe Coffee Roasters,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",39.951659,-75.165238
2,City Hall Courtyard,"[{'id': '4bf58dd8d48988d164941735', 'name': 'P...",39.952484,-75.163592
3,One Liberty Observation Deck,"[{'id': '4bf58dd8d48988d165941735', 'name': 'S...",39.952740,-75.168068
4,JFK Plaza / Love Park,"[{'id': '4bf58dd8d48988d164941735', 'name': 'P...",39.954123,-75.165303
5,"The Ritz-Carlton, Philadelphia","[{'id': '4bf58dd8d48988d1fa931735', 'name': 'H...",39.951446,-75.164149
6,sweetgreen,"[{'id': '4bf58dd8d48988d1bd941735', 'name': 'S...",39.953115,-75.167258
7,UNIQLO,"[{'id': '4bf58dd8d48988d103951735', 'name': 'C...",39.951426,-75.167634
8,Del Frisco's Double Eagle Steak House,"[{'id': '4bf58dd8d48988d1cc941735', 'name': 'S...",39.950795,-75.165389
9,R2L,"[{'id': '4bf58dd8d48988d14e941735', 'name': 'A...",39.951714,-75.167485


In [34]:
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Dilworth Park,Park,39.952772,-75.164723
1,La Colombe Coffee Roasters,Coffee Shop,39.951659,-75.165238
2,City Hall Courtyard,Plaza,39.952484,-75.163592
3,One Liberty Observation Deck,Scenic Lookout,39.952740,-75.168068
4,JFK Plaza / Love Park,Plaza,39.954123,-75.165303
5,"The Ritz-Carlton, Philadelphia",Hotel,39.951446,-75.164149
6,sweetgreen,Salad Place,39.953115,-75.167258
7,UNIQLO,Clothing Store,39.951426,-75.167634
8,Del Frisco's Double Eagle Steak House,Steakhouse,39.950795,-75.165389
9,R2L,American Restaurant,39.951714,-75.167485


In [35]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        venue_results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in venue_results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['PostalCode', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [36]:
phil_venues = getNearbyVenues(names = df_merged['PostalCode'],
                                   latitudes = df_merged['Latitude'],
                                   longitudes = df_merged['Longitude']
                                  )

19102
19103
19104
19106
19107
19111
19112
19114
19115
19116
19118
19119
19120
19121
19122
19123
19124
19125
19126
19127
19128
19129
19130
19131
19132
19133
19134
19135
19136
19137
19138
19139
19140
19141
19142
19143
19144
19145
19146
19147
19148
19149
19150
19151
19152
19153
19154


In [37]:
print(phil_venues.shape)
phil_venues.head()

(1163, 7)


Unnamed: 0,PostalCode,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,19102,39.952962,-75.16558,Dilworth Park,39.952772,-75.164723,Park
1,19102,39.952962,-75.16558,La Colombe Coffee Roasters,39.951659,-75.165238,Coffee Shop
2,19102,39.952962,-75.16558,City Hall Courtyard,39.952484,-75.163592,Plaza
3,19102,39.952962,-75.16558,One Liberty Observation Deck,39.95274,-75.168068,Scenic Lookout
4,19102,39.952962,-75.16558,JFK Plaza / Love Park,39.954123,-75.165303,Plaza


In [38]:
phil_venues.groupby('PostalCode').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
19102,100,100,100,100,100,100
19103,100,100,100,100,100,100
19104,19,19,19,19,19,19
19106,100,100,100,100,100,100
19107,100,100,100,100,100,100
19111,14,14,14,14,14,14
19112,1,1,1,1,1,1
19114,6,6,6,6,6,6
19115,2,2,2,2,2,2
19116,11,11,11,11,11,11


In [41]:
print('There are {} unique categories.'.format(len(phil_venues['Venue Category'].unique())))

There are 229 unique categories.


In [42]:
phil_onehot = pd.get_dummies(phil_venues[['Venue Category']], prefix="", prefix_sep="")

phil_onehot['PostalCode'] = phil_venues['PostalCode'] 

fixed_columns = [phil_onehot.columns[-1]] + list(phil_onehot.columns[:-1])
phil_onehot = phil_onehot[fixed_columns]
phil_onehot.head()

Unnamed: 0,PostalCode,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo Exhibit
0,19102,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,19102,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,19102,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,19102,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,19102,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [43]:
phil_grouped = phil_onehot.groupby('PostalCode').mean().reset_index()
phil_grouped

Unnamed: 0,PostalCode,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo Exhibit
0,19102,0.0,0.0,0.0,0.04,0.0,0.01,0.01,0.0,0.0,...,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0
1,19103,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,...,0.04,0.0,0.0,0.01,0.01,0.01,0.0,0.01,0.02,0.0
2,19104,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,19106,0.0,0.01,0.0,0.04,0.03,0.01,0.0,0.0,0.01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,19107,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.01,...,0.01,0.01,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0
5,19111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,19112,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,19114,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,19115,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,19116,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
phil_grouped.PostalCode = phil_grouped.PostalCode.astype(str)

In [45]:
num_top_venues = 5
for code in phil_grouped['PostalCode']:
    print("----"+code+"----")
    temp = phil_grouped[phil_grouped['PostalCode'] == code].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----19102----
                 venue  freq
0          Coffee Shop  0.07
1                Hotel  0.06
2  American Restaurant  0.04
3          Salad Place  0.03
4          Yoga Studio  0.03


----19103----
                           venue  freq
0            American Restaurant  0.06
1                    Coffee Shop  0.05
2                  Deli / Bodega  0.04
3  Vegetarian / Vegan Restaurant  0.04
4                            Bar  0.03


----19104----
                    venue  freq
0             Pizza Place  0.11
1            Intersection  0.05
2  Thrift / Vintage Store  0.05
3              Hookah Bar  0.05
4       Mobile Phone Shop  0.05


----19106----
                 venue  freq
0       History Museum  0.09
1   Italian Restaurant  0.05
2        Historic Site  0.05
3          Coffee Shop  0.05
4  American Restaurant  0.04


----19107----
                venue  freq
0              Bakery  0.07
1      Sandwich Place  0.05
2  Chinese Restaurant  0.04
3               Hotel  0.03
4       

In [46]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [47]:
phil_grouped.PostalCode = phil_grouped.PostalCode.astype(int)

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['PostalCode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = phil_grouped['PostalCode']

for ind in np.arange(phil_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(phil_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,19102,Coffee Shop,Hotel,American Restaurant,Yoga Studio,Seafood Restaurant,Italian Restaurant,Cosmetics Shop,Salad Place,Clothing Store,Chinese Restaurant
1,19103,American Restaurant,Coffee Shop,Vegetarian / Vegan Restaurant,Deli / Bodega,New American Restaurant,Sushi Restaurant,Bar,Clothing Store,Bakery,Restaurant
2,19104,Pizza Place,Intersection,Thrift / Vintage Store,Piano Bar,Photography Studio,Cosmetics Shop,Dive Bar,Coffee Shop,Sandwich Place,Chinese Restaurant
3,19106,History Museum,Historic Site,Coffee Shop,Italian Restaurant,American Restaurant,New American Restaurant,Art Gallery,Boutique,Hotel,Bar
4,19107,Bakery,Sandwich Place,Chinese Restaurant,Burger Joint,Hotel,Deli / Bodega,Korean Restaurant,Concert Hall,Pizza Place,Dessert Shop
5,19111,Deli / Bodega,Bar,Dance Studio,Diner,Pizza Place,Convenience Store,Park,Coffee Shop,Sandwich Place,Bus Station
6,19112,Food Truck,Zoo Exhibit,Food & Drink Shop,Food,Flower Shop,Fish Market,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market
7,19114,Golf Course,Sporting Goods Shop,Discount Store,Donut Shop,Gym / Fitness Center,Eye Doctor,Food & Drink Shop,Food,Flower Shop,Fish Market
8,19115,Italian Restaurant,Pool,Zoo Exhibit,Event Space,Food & Drink Shop,Food,Flower Shop,Fish Market,Filipino Restaurant,Field
9,19116,Indian Restaurant,Donut Shop,Spa,Chinese Restaurant,Beer Store,Beer Garden,Liquor Store,Bank,Grocery Store,Pizza Place


In [48]:
from sklearn.cluster import KMeans

kclusters = 3

phil_grouped_clustering = phil_grouped.drop('PostalCode', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(phil_grouped_clustering)

kmeans.labels_

array([0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0], dtype=int32)

In [50]:
%%debug 
phil_merged = df_merged[0:47]

phil_merged['Cluster Labels'] = kmeans.labels_

phil_merged = phil_merged.join(neighborhoods_venues_sorted.set_index('PostalCode'), on='PostalCode')

phil_merged.head()

NOTE: Enter 'c' at the ipdb>  prompt to continue execution.
> [0;32m<string>[0m(2)[0;36m<module>[0;34m()[0m

ipdb> C
['Philadelphia', 'Philadelphia', 'Mid City East', 'Middle City East', 'Philadelphia', 'Mid City West', 'Middle City West', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Manayunk', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Overbr

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [51]:
map_clusters = folium.Map(location=[39.9524152, -75.1635755], zoom_start=11)

x = np.arange(kclusters)
colors_array = cm.rainbow(np.linspace(0, 1, kclusters))
rainbow = [colors.rgb2hex(i) for i in colors_array]
print(rainbow)

markers_colors = []
for lat, lon, nei , cluster in zip(phil_merged['Latitude'], phil_merged['Longitude'], phil_merged['PostalCode'], phil_merged['Cluster Labels']):
    label = folium.Popup(str(nei) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

['#8000ff', '#80ffb4', '#ff0000']


In [52]:
phil_merged.loc[phil_merged['Cluster Labels'] == 0, phil_merged.columns[[0] + list(range(5, phil_merged.shape[1]))]]

Unnamed: 0,PostalCode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,19102,0,Coffee Shop,Hotel,American Restaurant,Yoga Studio,Seafood Restaurant,Italian Restaurant,Cosmetics Shop,Salad Place,Clothing Store,Chinese Restaurant
3,19103,0,American Restaurant,Coffee Shop,Vegetarian / Vegan Restaurant,Deli / Bodega,New American Restaurant,Sushi Restaurant,Bar,Clothing Store,Bakery,Restaurant
4,19104,0,Pizza Place,Intersection,Thrift / Vintage Store,Piano Bar,Photography Studio,Cosmetics Shop,Dive Bar,Coffee Shop,Sandwich Place,Chinese Restaurant
6,19106,0,History Museum,Historic Site,Coffee Shop,Italian Restaurant,American Restaurant,New American Restaurant,Art Gallery,Boutique,Hotel,Bar
7,19107,0,Bakery,Sandwich Place,Chinese Restaurant,Burger Joint,Hotel,Deli / Bodega,Korean Restaurant,Concert Hall,Pizza Place,Dessert Shop
11,19111,0,Deli / Bodega,Bar,Dance Studio,Diner,Pizza Place,Convenience Store,Park,Coffee Shop,Sandwich Place,Bus Station
13,19114,0,Golf Course,Sporting Goods Shop,Discount Store,Donut Shop,Gym / Fitness Center,Eye Doctor,Food & Drink Shop,Food,Flower Shop,Fish Market
15,19116,0,Indian Restaurant,Donut Shop,Spa,Chinese Restaurant,Beer Store,Beer Garden,Liquor Store,Bank,Grocery Store,Pizza Place
16,19118,0,American Restaurant,Boutique,Brewery,Ice Cream Shop,Flower Shop,Grocery Store,Bank,Bakery,Cosmetics Shop,Snack Place
17,19119,0,Pizza Place,Chinese Restaurant,Pharmacy,Train Station,Rental Car Location,Moving Target,Supermarket,Bakery,Gym,Café
