# Tokyo in the Heart of NYC

## Importing Libraries & Structuring the Data

#### The first thing to do is import the libraries I mostly would use in order to start with my project.

In [1]:
import pandas as pd
import numpy as np 
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests
!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library
from sklearn.cluster import KMeans # import k-means from clustering stage
import random # library for random number generation
# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
print('Folium installed')
print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    certifi-2019.11.28         |   py36h9f0ad1d_1         149 KB  conda-forge
    openssl-1.1.1f             |       h516909a_0         2.1 MB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1

In [3]:
from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs
import matplotlib.cm as cm
import matplotlib.colors as colors
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

###### In order to achieve a specific result, my analysis will be based on an online dataset of New York divided into neighborhoods. I will then gather information from this data and with the help of the Foursquare API calls to get further data of multiple aspects of my analysis in order to come up with a complete analysis to answer the investor's main concern which will be presented below: 

### Do investing in a Sushi restaurant in New York considered a smart investment? And if so, where exactly?

##### First I will download the data points for New York city neighborhoods and boroughs. Then the data will be displayed in a dataframe of 4 features: 'Borough', 'Neighborhood', 'Latitude', 'Longitude'.

In [4]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


In [5]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [6]:
neighborhoods_data = newyork_data['features']

In [7]:
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
neighborhoods = pd.DataFrame(columns=column_names)

#### Using a for loop, the data will now be filled in a data frame to be well displayed for further analysis.

In [9]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

#### The first 5 neighborhoods(one in each row) will be displayed below.

In [10]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


#### Five Boroghs are available and their neighborhood count is as follows.  The number of total rows and columns are displayed below.

In [11]:
neighborhoods['Borough'].value_counts()

Queens           162
Brooklyn         140
Staten Island    126
Bronx            104
Manhattan         80
Name: Borough, dtype: int64

#### Next I will use the geopy library to get coordinates of a specific location or address and as an example I will find Logitude and Latitude of the address: 'New York City, NY'

In [12]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [13]:
nylatitude=latitude
nylongitude=longitude

print(nylatitude,nylongitude)

40.7127281 -74.0060152


#### This is a visualization of all the neighborhoods and boroughs available in our dataset displayed on a map of New York.

In [14]:
map_newyork = folium.Map(location=[nylatitude, nylongitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

## Explore Neighborhoods in New York

#### Let's define my Foursquare credentials.

In [15]:
client_id = 'XGJGTNGTL5TSTES5DIUFSHYOTYBMDGBYV3RD4BWH33NJXR24' 
client_secret = 'O3KBSILTMZJ1MJVAE1OS3HOY4HAA5ZLKESD0FX42O4XEZ2GU' 
version = '20200331'

print('My Credentails:')
print('CLIENT_ID: ' + client_id)
print('CLIENT_SECRET:' + client_secret)

My Credentails:
CLIENT_ID: XGJGTNGTL5TSTES5DIUFSHYOTYBMDGBYV3RD4BWH33NJXR24
CLIENT_SECRET:O3KBSILTMZJ1MJVAE1OS3HOY4HAA5ZLKESD0FX42O4XEZ2GU


#### Let's create a function to explore all available venues within a specified radius of our neighborhoods in Newyork.

In [16]:
limit=500
version='20200401'
radius=10000000000
def getNearbyVenues(names, latitudes, longitudes, radius=200):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
    
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            client_id, 
            client_secret, 
            version, 
            lat, 
            lng, 
            radius, 
            limit)
   
        results = requests.get(url).json()["response"]['groups'][0]['items']
    
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now we write the code to run the above function on each neighborhood and create a new dataframe called *NewYork_venues*.

In [17]:
newyork_venue = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude'])


Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Marble Hill
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker

#### Let's check the size of the resulting dataframe.

In [18]:
print(newyork_venue.shape)

(5272, 7)


In [19]:
newyork_venue.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wakefield,40.894705,-73.847201,Lollipops Gelato,40.894123,-73.845892,Dessert Shop
1,Wakefield,40.894705,-73.847201,Shell,40.894187,-73.845862,Gas Station
2,Wakefield,40.894705,-73.847201,Pitman Deli,40.894149,-73.845748,Food
3,Co-op City,40.874294,-73.829939,Modell's Sporting Goods,40.872584,-73.829532,Sporting Goods Shop
4,Co-op City,40.874294,-73.829939,truman track n field,40.874963,-73.830847,Baseball Field


#### Let's check how many venues were returned for each neighborhood

In [20]:
newyork_venue.groupby('Neighborhood').count().head()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Allerton,22,22,22,22,22,22
Annadale,2,2,2,2,2,2
Arden Heights,2,2,2,2,2,2
Arlington,2,2,2,2,2,2
Arrochar,2,2,2,2,2,2


#### Let's find out how many unique categories can be curated from all the returned venues


In [21]:
print('There are {} uniques categories.'.format(len(newyork_venue['Venue Category'].unique())))

There are 309 uniques categories.


### Filtering the data to obtain venues related to our target investment, a dataframe of Sushi or Japanese restaurants will be shown below.

In [22]:
df = newyork_venue[(newyork_venue['Venue Category'] == 'Sushi Restaurant')|(newyork_venue['Venue Category'] == 'Japanese Restaurant')].reset_index(drop=True)
print(df.shape)
df.head()

(114, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bay Ridge,40.625801,-74.030621,Inaka,40.625141,-74.030418,Sushi Restaurant
1,Greenpoint,40.730201,-73.954241,Chiko,40.731871,-73.954369,Sushi Restaurant
2,Greenpoint,40.730201,-73.954241,Sakura 6,40.728804,-73.953683,Sushi Restaurant
3,Brighton Beach,40.576825,-73.965094,Sachiko Sushi,40.57638,-73.965797,Sushi Restaurant
4,Brighton Beach,40.576825,-73.965094,Umi Japanese Cuisine,40.576609,-73.966667,Sushi Restaurant


In [23]:
df.rename(columns={"Venue Latitude": "Venue_latitude", "Venue Longitude": "Venue_longitude"},inplace=True)
df.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue_latitude,Venue_longitude,Venue Category
0,Bay Ridge,40.625801,-74.030621,Inaka,40.625141,-74.030418,Sushi Restaurant
1,Greenpoint,40.730201,-73.954241,Chiko,40.731871,-73.954369,Sushi Restaurant
2,Greenpoint,40.730201,-73.954241,Sakura 6,40.728804,-73.953683,Sushi Restaurant
3,Brighton Beach,40.576825,-73.965094,Sachiko Sushi,40.57638,-73.965797,Sushi Restaurant
4,Brighton Beach,40.576825,-73.965094,Umi Japanese Cuisine,40.576609,-73.966667,Sushi Restaurant


### Visual of Sushi and Japanese Restaurants available

In [24]:
map_sushiplace_japanese = folium.Map(location=[nylatitude, nylongitude], zoom_start=11)

# add a red circle marker to represent New York
folium.features.CircleMarker(
    [nylatitude, nylongitude],
    radius=6,
    color='red',
    popup='New York',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(map_sushiplace_japanese)


for lat, lng, label  in zip(df.Venue_latitude, df.Venue_longitude, df.Venue):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,).add_to(map_sushiplace_japanese)  
    
map_sushiplace_japanese

## Analyze Each Neighborhood

#### The aim of this section is to test the popularity & frequency of visiting different venues I am targeting.

In [25]:
# one hot encoding
newyork_onehot = pd.get_dummies(newyork_venue[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
newyork_onehot['Neighborhood'] =newyork_venue['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [newyork_onehot.columns[-1]] + list(newyork_onehot.columns[:-1])
newyork_onehot = newyork_onehot[fixed_columns]

newyork_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,American Restaurant,Antique Shop,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beach,Beer Bar,Beer Garden,Beer Store,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Boxing Gym,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Butcher,Café,Cajun / Creole Restaurant,Camera Store,Campground,Candy Store,Caribbean Restaurant,Caucasian Restaurant,Check Cashing Service,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Circus,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Academic Building,College Gym,Comedy Club,Comfort Food Restaurant,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cooking School,Cosmetics Shop,Creperie,Cuban Restaurant,Cultural Center,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Discount Store,Distillery,Dive Bar,Doctor's Office,Dog Run,Donut Shop,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,English Restaurant,Event Space,Exhibit,Eye Doctor,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Halal Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,High School,Himalayan Restaurant,Historic Site,History Museum,Hobby Shop,Home Service,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Indoor Play Area,Intersection,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kids Store,Korean Restaurant,Latin American Restaurant,Laundromat,Laundry Service,Lawyer,Library,Lingerie Store,Liquor Store,Locksmith,Lounge,Malay Restaurant,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Molecular Gastronomy Restaurant,Monument / Landmark,Movie Theater,Moving Target,Museum,Music Store,Music Venue,Nail Salon,Neighborhood,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Other Great Outdoors,Outdoor Sculpture,Outdoor Supply Store,Outdoors & Recreation,Paella Restaurant,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Perfume Shop,Peruvian Restaurant,Pet Café,Pet Store,Pharmacy,Photography Lab,Piano Bar,Pie Shop,Pier,Piercing Parlor,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Pool Hall,Post Office,Pub,Racetrack,Ramen Restaurant,Record Shop,Recording Studio,Rental Car Location,Residential Building (Apartment / Condo),Restaurant,Rock Climbing Spot,Rock Club,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Seafood Restaurant,Shanghai Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Spiritual Center,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Steakhouse,Street Art,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Temple,Tennis Court,Tex-Mex Restaurant,Thai Restaurant,Theater,Thrift / Vintage Store,Tiki Bar,Tourist Information Center,Toy / Game Store,Trail,Train,Train Station,Turkish Restaurant,Vape Store,Varenyky restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Co-op City,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Co-op City,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### And let's examine the new dataframe size.

In [26]:
newyork_onehot.shape

(5272, 309)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [27]:
newyork_grouped = newyork_onehot.groupby('Neighborhood').mean().reset_index()
newyork_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,American Restaurant,Antique Shop,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beach,Beer Bar,Beer Garden,Beer Store,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Boxing Gym,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Butcher,Café,Cajun / Creole Restaurant,Camera Store,Campground,Candy Store,Caribbean Restaurant,Caucasian Restaurant,Check Cashing Service,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Circus,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Academic Building,College Gym,Comedy Club,Comfort Food Restaurant,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cooking School,Cosmetics Shop,Creperie,Cuban Restaurant,Cultural Center,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Discount Store,Distillery,Dive Bar,Doctor's Office,Dog Run,Donut Shop,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,English Restaurant,Event Space,Exhibit,Eye Doctor,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Halal Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,High School,Himalayan Restaurant,Historic Site,History Museum,Hobby Shop,Home Service,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Indoor Play Area,Intersection,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kids Store,Korean Restaurant,Latin American Restaurant,Laundromat,Laundry Service,Lawyer,Library,Lingerie Store,Liquor Store,Locksmith,Lounge,Malay Restaurant,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Molecular Gastronomy Restaurant,Monument / Landmark,Movie Theater,Moving Target,Museum,Music Store,Music Venue,Nail Salon,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Other Great Outdoors,Outdoor Sculpture,Outdoor Supply Store,Outdoors & Recreation,Paella Restaurant,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Perfume Shop,Peruvian Restaurant,Pet Café,Pet Store,Pharmacy,Photography Lab,Piano Bar,Pie Shop,Pier,Piercing Parlor,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Pool Hall,Post Office,Pub,Racetrack,Ramen Restaurant,Record Shop,Recording Studio,Rental Car Location,Residential Building (Apartment / Condo),Restaurant,Rock Climbing Spot,Rock Club,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Seafood Restaurant,Shanghai Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Spiritual Center,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Steakhouse,Street Art,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Temple,Tennis Court,Tex-Mex Restaurant,Thai Restaurant,Theater,Thrift / Vintage Store,Tiki Bar,Tourist Information Center,Toy / Game Store,Trail,Train,Train Station,Turkish Restaurant,Vape Store,Varenyky restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Allerton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Annadale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Arden Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Arlington,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Arrochar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Displayed below is the mean of the frequency of occurrence of both *Sushi and Japanese Restaurants*.

In [28]:
newyork_grouped[['Neighborhood','Sushi Restaurant','Japanese Restaurant']]

Unnamed: 0,Neighborhood,Sushi Restaurant,Japanese Restaurant
0,Allerton,0.0,0.0
1,Annadale,0.0,0.0
2,Arden Heights,0.0,0.0
3,Arlington,0.0,0.0
4,Arrochar,0.0,0.0
5,Astoria,0.0,0.0
6,Astoria Heights,0.0,0.0
7,Auburndale,0.0,0.0
8,Bath Beach,0.0,0.0
9,Battery Park City,0.038462,0.0


#### Let's confirm the new size

In [29]:
newyork_grouped.shape

(260, 309)

#### Let's print each neighborhood along with the top 5 most common venues

In [30]:
num_top_venues = 3

for hood in newyork_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = newyork_grouped[newyork_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Allerton----
               venue  freq
0        Pizza Place  0.18
1  Martial Arts Dojo  0.09
2        Gas Station  0.09


----Annadale----
                  venue  freq
0          Cocktail Bar   1.0
1  Other Great Outdoors   0.0
2                  Park   0.0


----Arden Heights----
          venue  freq
0      Bus Stop   1.0
1   Yoga Studio   0.0
2  Optical Shop   0.0


----Arlington----
          venue  freq
0      Bus Stop   1.0
1   Yoga Studio   0.0
2  Optical Shop   0.0


----Arrochar----
         venue  freq
0  Pizza Place   1.0
1  Yoga Studio   0.0
2  Opera House   0.0


----Astoria----
                  venue  freq
0          Intersection  0.25
1          Liquor Store  0.25
2  Fast Food Restaurant  0.25


----Astoria Heights----
                venue  freq
0               Plaza  0.25
1  Chinese Restaurant  0.25
2          Laundromat  0.25


----Auburndale----
           venue  freq
0  Deli / Bodega   0.5
1  Train Station   0.5
2    Pastry Shop   0.0


----Bath Beach----
   

2           Pharmacy  0.33


----Downtown----
                venue  freq
0        Burger Joint  0.04
1    Asian Restaurant  0.04
2  Chinese Restaurant  0.04


----Dumbo----
                 venue  freq
0                  Gym  0.06
1               Bakery  0.06
2  American Restaurant  0.06


----Dyker Heights----
                           venue  freq
0                   Burger Joint   1.0
1                   Optical Shop   0.0
2  Paper / Office Supplies Store   0.0


----East Flatbush----
              venue  freq
0  Department Store  0.12
1       Supermarket  0.12
2    Hardware Store  0.12


----East Harlem----
                  venue  freq
0           Pizza Place  0.12
1  Gym / Fitness Center  0.08
2   Fried Chicken Joint  0.08


----East New York----
              venue  freq
0     Deli / Bodega   0.4
1  Asian Restaurant   0.2
2        Food Truck   0.2


----East Tremont----
        venue  freq
0    Bus Stop  0.09
1  Shoe Store  0.09
2        Café  0.09


----East Village----
      

               venue  freq
0  Health Food Store  0.17
1    Photography Lab  0.17
2      Deli / Bodega  0.17


----Lenox Hill----
               venue  freq
0  French Restaurant  0.07
1       Burger Joint  0.07
2         Taco Place  0.07


----Lighthouse Hill----
          venue  freq
0    Art Museum   1.0
1   Yoga Studio   0.0
2  Optical Shop   0.0


----Lincoln Square----
                   venue  freq
0                Theater  0.17
1  Performing Arts Venue  0.10
2    Indie Movie Theater  0.10


----Lindenwood----
         venue  freq
0   Playground   0.5
1         Park   0.5
2  Yoga Studio   0.0


----Little Italy----
                venue  freq
0  Italian Restaurant  0.11
1         Coffee Shop  0.08
2              Bakery  0.06


----Little Neck----
         venue  freq
0   Playground   0.5
1     Pharmacy   0.5
2  Yoga Studio   0.0


----Long Island City----
           venue  freq
0    Coffee Shop  0.11
1           Café  0.11
2  Deli / Bodega  0.08


----Longwood----
                

         venue  freq
0   Playground   1.0
1  Yoga Studio   0.0
2  Opera House   0.0


----Randall Manor----
         venue  freq
0   Playground   1.0
1  Yoga Studio   0.0
2  Opera House   0.0


----Ravenswood----
                  venue  freq
0         Grocery Store   1.0
1           Yoga Studio   0.0
2  Pakistani Restaurant   0.0


----Red Hook----
                     venue  freq
0               Bagel Shop  0.12
1                Wine Shop  0.12
2  New American Restaurant  0.12


----Rego Park----
                 venue  freq
0    Martial Arts Dojo  0.14
1  Japanese Restaurant  0.14
2  American Restaurant  0.14


----Remsen Village----
                           venue  freq
0           Caribbean Restaurant   1.0
1                    Yoga Studio   0.0
2  Paper / Office Supplies Store   0.0


----Richmond Hill----
                       venue  freq
0  Latin American Restaurant  0.25
1                Supermarket  0.12
2       Gym / Fitness Center  0.12


----Richmond Valley----
         

                  venue  freq
0      Sushi Restaurant  0.12
1  Gym / Fitness Center  0.06
2                  Bank  0.06




#### Let's put that into a *pandas* dataframe

##### First, let's write a function to sort the venues in descending order.

In [31]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

##### Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [32]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = newyork_grouped['Neighborhood']

for ind in np.arange(newyork_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(newyork_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(15)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Allerton,Pizza Place,Bus Station,Pharmacy,Martial Arts Dojo,Supermarket
1,Annadale,Cocktail Bar,Women's Store,Fish Market,Farm,Farmers Market
2,Arden Heights,Bus Stop,Women's Store,Flea Market,Farm,Farmers Market
3,Arlington,Bus Stop,Women's Store,Flea Market,Farm,Farmers Market
4,Arrochar,Pizza Place,Women's Store,Fish Market,Falafel Restaurant,Farm
5,Astoria,Gourmet Shop,Intersection,Fast Food Restaurant,Liquor Store,Flea Market
6,Astoria Heights,Plaza,Laundromat,Chinese Restaurant,Food,Women's Store
7,Auburndale,Deli / Bodega,Train Station,Eye Doctor,Falafel Restaurant,Farm
8,Bath Beach,Rental Car Location,Ice Cream Shop,Pizza Place,Video Store,Gas Station
9,Battery Park City,Park,Food Court,Sandwich Place,Coffee Shop,Cupcake Shop


## 4. Cluster Neighborhoods

#### Run *k*-means to cluster the neighborhood into 5 clusters.

In [33]:
kclusters = 5

newyork_grouped_clustering = newyork_grouped.drop('Neighborhood',1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(newyork_grouped_clustering)

kmeans.labels_[0:10] 

array([4, 2, 0, 0, 4, 2, 4, 3, 2, 2], dtype=int32)

#### Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [34]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

neighborhoods_merged = neighborhoods

neighborhoods_merged = neighborhoods_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

neighborhoods_merged.head() 


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bronx,Wakefield,40.894705,-73.847201,2.0,Dessert Shop,Gas Station,Food,Women's Store,Fish Market
1,Bronx,Co-op City,40.874294,-73.829939,2.0,Sporting Goods Shop,Baseball Field,Bus Station,Fish Market,Farm
2,Bronx,Eastchester,40.887556,-73.827806,2.0,Bus Stop,Diner,Automotive Shop,Women's Store,Flea Market
3,Bronx,Fieldston,40.895437,-73.905643,2.0,Plaza,Cosmetics Shop,Women's Store,Fish Market,Falafel Restaurant
4,Bronx,Riverdale,40.890834,-73.912585,2.0,Moving Target,Women's Store,Fish Market,Falafel Restaurant,Farm


In [35]:
neighborhoods_merged.rename(columns={'Cluster Labels':'Cluster_labels'},inplace=True)
neighborhoods_merged.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bronx,Wakefield,40.894705,-73.847201,2.0,Dessert Shop,Gas Station,Food,Women's Store,Fish Market
1,Bronx,Co-op City,40.874294,-73.829939,2.0,Sporting Goods Shop,Baseball Field,Bus Station,Fish Market,Farm
2,Bronx,Eastchester,40.887556,-73.827806,2.0,Bus Stop,Diner,Automotive Shop,Women's Store,Flea Market
3,Bronx,Fieldston,40.895437,-73.905643,2.0,Plaza,Cosmetics Shop,Women's Store,Fish Market,Falafel Restaurant
4,Bronx,Riverdale,40.890834,-73.912585,2.0,Moving Target,Women's Store,Fish Market,Falafel Restaurant,Farm


## 5. Examine Clusters

##### Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, you can then assign a name to each cluster. I will leave this exercise to you.

#### Cluster 1

In [36]:
cl1=neighborhoods_merged.loc[neighborhoods_merged['Cluster_labels'] == 0, neighborhoods_merged.columns[[0]+[1]+[2]+[3]+[4]\
+ list(range(5, neighborhoods_merged.shape[1]))]]
cl1.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
198,Staten Island,New Brighton,40.640615,-74.087017,0.0,Bus Stop,Bowling Alley,Women's Store,Flea Market,Farmers Market
206,Staten Island,Mariner's Harbor,40.632546,-74.150085,0.0,Baseball Field,Bus Stop,Women's Store,Fish Market,Farm
227,Staten Island,Arlington,40.635325,-74.165104,0.0,Bus Stop,Women's Store,Flea Market,Farm,Farmers Market
229,Staten Island,Grasmere,40.598268,-74.076674,0.0,Park,Bus Stop,Fish Market,Falafel Restaurant,Farm
241,Staten Island,Arden Heights,40.549286,-74.185887,0.0,Bus Stop,Women's Store,Flea Market,Farm,Farmers Market


#### Cluster 2

In [37]:
cl2=neighborhoods_merged.loc[neighborhoods_merged['Cluster_labels'] == 1, neighborhoods_merged.columns[[0]+[1]+[2]+[3]+[4]\
+ list(range(5, neighborhoods_merged.shape[1]))]]
cl2.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
27,Bronx,Clason Point,40.806551,-73.854144,1.0,Pool,Park,Playground,Filipino Restaurant,Factory
35,Bronx,Spuyten Duyvil,40.881395,-73.91719,1.0,Park,Bus Line,Fish Market,Falafel Restaurant,Farm
39,Bronx,Edgewater Park,40.821986,-73.813885,1.0,Park,Bus Station,Fish Market,Falafel Restaurant,Farm
52,Brooklyn,Sheepshead Bay,40.58689,-73.943186,1.0,Park,Fish & Chips Shop,Factory,Falafel Restaurant,Farm
166,Queens,Rochdale,40.675211,-73.772588,1.0,Park,Fish & Chips Shop,Factory,Falafel Restaurant,Farm


#### Cluster 3

In [38]:
cl3=neighborhoods_merged.loc[neighborhoods_merged['Cluster_labels'] == 2, neighborhoods_merged.columns[[0]+[1]+[2]+[3]+[4]\
+ list(range(5, neighborhoods_merged.shape[1]))]]
cl3.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bronx,Wakefield,40.894705,-73.847201,2.0,Dessert Shop,Gas Station,Food,Women's Store,Fish Market
1,Bronx,Co-op City,40.874294,-73.829939,2.0,Sporting Goods Shop,Baseball Field,Bus Station,Fish Market,Farm
2,Bronx,Eastchester,40.887556,-73.827806,2.0,Bus Stop,Diner,Automotive Shop,Women's Store,Flea Market
3,Bronx,Fieldston,40.895437,-73.905643,2.0,Plaza,Cosmetics Shop,Women's Store,Fish Market,Falafel Restaurant
4,Bronx,Riverdale,40.890834,-73.912585,2.0,Moving Target,Women's Store,Fish Market,Falafel Restaurant,Farm


#### Cluster 4

In [39]:
cl4=neighborhoods_merged.loc[neighborhoods_merged['Cluster_labels'] == 3, neighborhoods_merged.columns[[0]+[1]+[2]+[3]+[4]\
+ list(range(5, neighborhoods_merged.shape[1]))]]
cl4.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
8,Bronx,Norwood,40.877224,-73.879391,3.0,Deli / Bodega,Park,History Museum,Factory,Falafel Restaurant
65,Brooklyn,Cobble Hill,40.68792,-73.998561,3.0,Deli / Bodega,Coffee Shop,Pilates Studio,Argentinian Restaurant,Fish Market
72,Brooklyn,East New York,40.669926,-73.880699,3.0,Deli / Bodega,Asian Restaurant,Food Truck,Chinese Restaurant,Farmers Market
78,Brooklyn,Coney Island,40.574293,-73.988683,3.0,Deli / Bodega,Music Venue,Historic Site,Fish & Chips Shop,Falafel Restaurant
89,Brooklyn,Ocean Hill,40.678403,-73.913068,3.0,Deli / Bodega,Coffee Shop,Dry Cleaner,Falafel Restaurant,Farmers Market


#### Cluster 5

In [47]:
cl5=neighborhoods_merged.loc[neighborhoods_merged['Cluster_labels'] == 4, neighborhoods_merged.columns[[0]+[1]+[2]+[3]+[4]\
+ list(range(5, neighborhoods_merged.shape[1]))]]
cl5.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
6,Manhattan,Marble Hill,40.876551,-73.91066,4.0,Pharmacy,Women's Store,Fish & Chips Shop,Falafel Restaurant,Farm
13,Bronx,Bedford Park,40.870185,-73.885512,4.0,Bus Station,Chinese Restaurant,Women's Store,Flea Market,Farmers Market
19,Bronx,High Bridge,40.836623,-73.926102,4.0,Playground,Pharmacy,Market,Supermarket,Asian Restaurant
21,Bronx,Mott Haven,40.806239,-73.9161,4.0,Department Store,Pizza Place,Grocery Store,Discount Store,Peruvian Restaurant
26,Bronx,Soundview,40.821012,-73.865746,4.0,Chinese Restaurant,Mobile Phone Shop,Discount Store,Grocery Store,Women's Store


## Optimal Location for a Sushi Place Investment 

#### Let's list the Neighborhoods in New York where Sushi or Japanese Restaurants are among the top 3 most common venues in their Neighborhoods.


In [48]:
Tokyo_top_three=neighborhoods_merged[(neighborhoods_merged['1st Most Common Venue'] == 'Sushi Restaurant')|\
                     (neighborhoods_merged['1st Most Common Venue'] == 'Japanese Restaurant')|\
                     (neighborhoods_merged['2nd Most Common Venue'] == 'Sushi Restaurant')|\
                     (neighborhoods_merged['2nd Most Common Venue'] == 'Japanese Restaurant')|\
                     (neighborhoods_merged['3rd Most Common Venue'] == 'Sushi Restaurant')|\
                     (neighborhoods_merged['3rd Most Common Venue'] == 'Japanese Restaurant')].reset_index(drop=True)
print(Tokyo_top_three.shape)
Tokyo_top_three.head()

(12, 10)


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Brooklyn,Brighton Beach,40.576825,-73.965094,2.0,Restaurant,Sushi Restaurant,Gourmet Shop,Playground,Coffee Shop
1,Brooklyn,Fort Greene,40.688527,-73.972906,2.0,Japanese Restaurant,New American Restaurant,Playground,Yoga Studio,Café
2,Manhattan,Yorkville,40.77593,-73.947118,2.0,Sushi Restaurant,Pizza Place,Hobby Shop,Liquor Store,Café
3,Manhattan,Murray Hill,40.748303,-73.978332,2.0,Korean Restaurant,Coffee Shop,Japanese Restaurant,Bank,Jewish Restaurant
4,Manhattan,Tribeca,40.721522,-74.010683,2.0,Spa,Yoga Studio,Sushi Restaurant,Hotel,Italian Restaurant


In [49]:
Tokyo_top_three[['Borough','Neighborhood','Latitude','Longitude','Cluster_labels']]

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster_labels
0,Brooklyn,Brighton Beach,40.576825,-73.965094,2.0
1,Brooklyn,Fort Greene,40.688527,-73.972906,2.0
2,Manhattan,Yorkville,40.77593,-73.947118,2.0
3,Manhattan,Murray Hill,40.748303,-73.978332,2.0
4,Manhattan,Tribeca,40.721522,-74.010683,2.0
5,Queens,Murray Hill,40.764126,-73.812763,2.0
6,Brooklyn,Brighton Beach,40.576825,-73.965094,2.0
7,Brooklyn,Fort Greene,40.688527,-73.972906,2.0
8,Manhattan,Yorkville,40.77593,-73.947118,2.0
9,Manhattan,Murray Hill,40.748303,-73.978332,2.0


In [50]:
Tokyo_top_three['Borough'].value_counts()

Manhattan    6
Brooklyn     4
Queens       2
Name: Borough, dtype: int64

### Finally, let's visualize the resulting clusters

In [54]:
Tokyo_top_three_map = folium.Map(location=[nylatitude, nylongitude], zoom_start=10.5) # generate map centred around the Conrad Hotel

folium.features.CircleMarker(
    [nylatitude, nylongitude],
    radius=12,
    color='red',
    popup='New York',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(Tokyo_top_three_map)

for lat, lng, borough,neighborhood,Cluster_labels in zip(Tokyo_top_three.Latitude, Tokyo_top_three.Longitude, Tokyo_top_three.Borough,Tokyo_top_three.Neighborhood,Tokyo_top_three.Cluster_labels):
    label = '{}, {},{}'.format(neighborhood, borough,Cluster_labels)
    label = folium.Popup(label, parse_html=True)
    folium.features.CircleMarker(
        [lat, lng],
        radius=12,
        color='black',
        popup=label,
        fill = True,
        fill_color='yellow',
        fill_opacity=0.6
    ).add_to(Tokyo_top_three_map)
    
for lat, lng, borough,neighborhood,Cluster_labels in zip(cl1.Latitude, cl1.Longitude, cl1.Borough,cl1.Neighborhood,cl1.Cluster_labels):
    label = '{}, {},{}'.format(neighborhood, borough,Cluster_labels)
    label = folium.Popup(label, parse_html=True)
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='green',
        popup=label,
        fill = True,
        fill_color='green',
        fill_opacity=0.6
    ).add_to(Tokyo_top_three_map)

for lat, lng, borough,neighborhood,Cluster_labels in zip(cl2.Latitude, cl2.Longitude, cl2.Borough,cl2.Neighborhood,cl2.Cluster_labels):
    label = '{}, {},{}'.format(neighborhood, borough,Cluster_labels)
    label = folium.Popup(label, parse_html=True)
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(Tokyo_top_three_map)

for lat, lng, borough,neighborhood,Cluster_labels in zip(cl3.Latitude, cl3.Longitude, cl3.Borough,cl3.Neighborhood,cl3.Cluster_labels):
    label = '{}, {},{}'.format(neighborhood, borough,Cluster_labels)
    label = folium.Popup(label, parse_html=True)
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='orange',
        popup=label,
        fill = True,
        fill_color='orange',
        fill_opacity=0.6
    ).add_to(Tokyo_top_three_map)
    
for lat, lng, borough,neighborhood,Cluster_labels in zip(cl4.Latitude, cl4.Longitude, cl4.Borough,cl4.Neighborhood,cl4.Cluster_labels):
    label = '{}, {},{}'.format(neighborhood, borough,Cluster_labels)
    label = folium.Popup(label, parse_html=True)
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='white',
        popup=label,
        fill = True,
        fill_color='white',
        fill_opacity=0.6
    ).add_to(Tokyo_top_three_map)

for lat, lng, borough,neighborhood,Cluster_labels in zip(cl5.Latitude, cl5.Longitude, cl5.Borough,cl5.Neighborhood,cl5.Cluster_labels):
    label = '{}, {},{}'.format(neighborhood, borough,Cluster_labels)
    label = folium.Popup(label, parse_html=True)
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='purple',
        popup=label,
        fill = True,
        fill_color='purple',
        fill_opacity=0.6
    ).add_to(Tokyo_top_three_map)



Tokyo_top_three_map