## Battle of Neighborhoods

In [114]:
import pandas as pd
import numpy as np

### First, lets take the dataset we build in the previous section, where we used Foursquare to get the venues from Toronto. After we grouped according to it's neighborhood and finally we list the 10 most popular venues for each neighborhood.

In [115]:
data = pd.read_csv("nb_venues_sorted.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Beer Bar,Seafood Restaurant,Farmers Market,Cheese Shop,Steakhouse,Café,Greek Restaurant
1,1,"Brockton, Parkdale Village, Exhibition Place",Coffee Shop,Breakfast Spot,Café,Nightclub,Pet Store,Stadium,Burrito Place,Restaurant,Climbing Gym,Performing Arts Venue
2,2,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Auto Workshop,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Comic Shop,Pizza Place,Butcher
3,3,"CN Tower, Bathurst Quay, South Niagara, Railwa...",Airport Lounge,Airport Service,Airport Terminal,Sculpture Garden,Boutique,Plane,Coffee Shop,Boat or Ferry,Harbor / Marina,Bar
4,4,Central Bay Street,Coffee Shop,Ice Cream Shop,Italian Restaurant,Juice Bar,Burger Joint,Café,Japanese Restaurant,Sandwich Place,Bakery,Salad Place


In [116]:
venues_listed = list()
for i in range(2,len(data.columns)):
    venues_listed.extend( list(data.iloc[:,i].unique() ) )
venues_listed = set(venues_listed)

In [117]:
venues_listed

{'Airport Lounge',
 'Airport Service',
 'Airport Terminal',
 'American Restaurant',
 'Aquarium',
 'Asian Restaurant',
 'Auto Workshop',
 'BBQ Joint',
 'Baby Store',
 'Bakery',
 'Bank',
 'Bar',
 'Beer Bar',
 'Boat or Ferry',
 'Bookstore',
 'Boutique',
 'Brazilian Restaurant',
 'Breakfast Spot',
 'Brewery',
 'Burger Joint',
 'Burrito Place',
 'Bus Line',
 'Butcher',
 'Café',
 'Cajun / Creole Restaurant',
 'Candy Store',
 'Cheese Shop',
 'Chinese Restaurant',
 'Climbing Gym',
 'Clothing Store',
 'Cocktail Bar',
 'Coffee Shop',
 'Comic Shop',
 'Cosmetics Shop',
 'Cuban Restaurant',
 'Dance Studio',
 'Deli / Bodega',
 'Department Store',
 'Dessert Shop',
 'Dim Sum Restaurant',
 'Diner',
 'Discount Store',
 'Dog Run',
 'Doner Restaurant',
 'Donut Shop',
 'Dumpling Restaurant',
 'Eastern European Restaurant',
 'Electronics Store',
 'Ethiopian Restaurant',
 'Farmers Market',
 'Fast Food Restaurant',
 'Fish & Chips Shop',
 'Flower Shop',
 'Food & Drink Shop',
 'Fried Chicken Joint',
 'Fruit & V

### Lets get user preferences and then normalize it

In [118]:
user_preferences = dict()

In [119]:
user_preferences['Rental Car Location'] = 7
user_preferences['Italian Restaurant'] = 10
user_preferences['Sports Bar'] = 3
user_preferences['Indian Restaurant'] = 8
user_preferences['Recording Studio'] = 10
user_preferences['Japanese Restaurant'] = 10
user_preferences['Dance Studio'] = 10
user_preferences['Harbor / Marina'] = 10
total =  np.sum(list(user_preferences.values() ) )
for key in user_preferences.keys():
    user_preferences[key] = user_preferences[key]/float(total)

In [120]:
user_preferences

{'Rental Car Location': 0.10294117647058823,
 'Italian Restaurant': 0.14705882352941177,
 'Sports Bar': 0.04411764705882353,
 'Indian Restaurant': 0.11764705882352941,
 'Recording Studio': 0.14705882352941177,
 'Japanese Restaurant': 0.14705882352941177,
 'Dance Studio': 0.14705882352941177,
 'Harbor / Marina': 0.14705882352941177}

In [121]:
len(venues_listed)

121

### Lets now create a new dataframe, where the columns are Neighborhood and all other venues that appear at the 10 most common venues dataframe

In [122]:
cols = list(['Neighborhood'])
cols.extend(venues_listed)
weighted_data = pd.DataFrame(columns=cols)
weighted_data.columns

Index(['Neighborhood', 'Rental Car Location', 'Sports Bar', 'Women's Store',
       'Recording Studio', 'Greek Restaurant', 'Pet Store',
       'Ethiopian Restaurant', 'Vegetarian / Vegan Restaurant', 'Dance Studio',
       ...
       'Sculpture Garden', 'Climbing Gym', 'Farmers Market', 'Restaurant',
       'Gay Bar', 'Indian Restaurant', 'Italian Restaurant', 'Cosmetics Shop',
       'Harbor / Marina', 'Japanese Restaurant'],
      dtype='object', length=122)

In [123]:
for column in data.columns:
    print(column)

Unnamed: 0
Neighborhood
1st Most Common Venue
2nd Most Common Venue
3rd Most Common Venue
4th Most Common Venue
5th Most Common Venue
6th Most Common Venue
7th Most Common Venue
8th Most Common Venue
9th Most Common Venue
10th Most Common Venue


#### Lets put some weights according to the place where the venue appear. For example, if at Berczy Park the most common venue is coffe shop and the second Cocktail Bar, coffee shop will get the highest weight and cocktail bar the second, and so on. To do that I got Formula's 1 points according to racer position, but it could be any other arbitrary weighting.

In [124]:
weighted_data.Neighborhood = data.Neighborhood
weights = [25,18,15,12,10,8,6,4,2,1]
weight_index = 0
for col in range(2,len(data.columns)):
    for i in range(data.shape[0]):
        weighted_data.iloc[i].loc[data.iloc[i,col]] = weights[weight_index]
    weight_index += 1
weighted_data.replace(to_replace=np.nan,value=0,inplace=True)
weighted_data.head()

Unnamed: 0,Neighborhood,Rental Car Location,Sports Bar,Women's Store,Recording Studio,Greek Restaurant,Pet Store,Ethiopian Restaurant,Vegetarian / Vegan Restaurant,Dance Studio,...,Sculpture Garden,Climbing Gym,Farmers Market,Restaurant,Gay Bar,Indian Restaurant,Italian Restaurant,Cosmetics Shop,Harbor / Marina,Japanese Restaurant
0,Berczy Park,0,0,0,0,1,0,0,0,0,...,0,0,8,0,0,0,0,0,0,0
1,"Brockton, Parkdale Village, Exhibition Place",0,0,0,0,0,10,0,0,0,...,0,2,0,4,0,0,0,0,0,0
2,Business Reply Mail Processing Centre 969 Eastern,0,0,0,0,0,0,0,0,0,...,0,0,6,0,0,0,0,0,0,0
3,"CN Tower, Bathurst Quay, South Niagara, Railwa...",0,0,0,0,0,0,0,0,0,...,12,0,0,0,0,0,0,0,2,0
4,Central Bay Street,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,15,0,0,6


### Multiply each column by user preferences

In [125]:
for key in user_preferences.keys():
    weighted_data[key] = weighted_data[key].apply(lambda x: x*user_preferences[key])

### Create a new column with the total of points and then sort the data according to it's values. Now we can list the top neighborhoods to stay

In [126]:
weighted_data['TOTAL'] = weighted_data.sum(axis=1)
weighted_data.sort_values(by='TOTAL',inplace=True,ascending=False)
weighted_data['Neighborhood'].head()

0                                   Berczy Park
13    Grange Park, Chinatown, Kensington Market
36                The Junction South, High Park
34               The Beaches West, India Bazaar
33                                  The Beaches
Name: Neighborhood, dtype: object

In [141]:
weighted_data[['Neighborhood','TOTAL']].head()

Unnamed: 0,Neighborhood,TOTAL
0,Berczy Park,101.0
13,"Grange Park, Chinatown, Kensington Market",101.0
36,"The Junction South, High Park",101.0
34,"The Beaches West, India Bazaar",101.0
33,The Beaches,101.0
