In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Read Datasets

In [2]:
city_hotels_df = pd.read_csv('data/city_hotel_features.txt', delimiter='\t')
city_hotels_df.head()

Unnamed: 0,Hotel_Name,City_Name,Features
0,Tanner's,atlanta,100 253 250 178 174 063 059 036 008 074 204 05...
1,Frijoleros,atlanta,250 062 132 174 063 197 071 142 234 243 075 20...
2,Indian Delights,atlanta,253 250 150 174 083 059 036 117 243 076 205 05...
3,Great Wall,atlanta,253 191 192 174 036 039 075 204 052 163
4,The Brickery,atlanta,100 253 086 231 250 191 192 059 036 215 005 00...


In [3]:
print city_hotels_df.info()
city_hotels_df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4160 entries, 0 to 4159
Data columns (total 3 columns):
Hotel_Name    4160 non-null object
City_Name     4160 non-null object
Features      4160 non-null object
dtypes: object(3)
memory usage: 97.6+ KB
None


Unnamed: 0,Hotel_Name,City_Name,Features
count,4160,4160,4160
unique,4022,8,3977
top,Hard Rock Cafe,new_york,125 075 205 053 167
freq,4,1200,14


In [4]:
features_df = pd.read_csv('data/features.txt', delimiter='\t', index_col=0)
features_df.head()

Unnamed: 0_level_0,Feature_Name
ID,Unnamed: 1_level_1
0,A
1,Authentic
2,Afghanistan
3,African
4,After Hours Dining


In [5]:
print features_df.info()
features_df.describe()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 257 entries, 0 to 256
Data columns (total 1 columns):
Feature_Name    257 non-null object
dtypes: object(1)
memory usage: 4.0+ KB
None


Unnamed: 0,Feature_Name
count,257
unique,248
top,$15-$30
freq,4


# 2. Hot-encode the features using _get_\__dummies_ method from Pandas

In [6]:
hotel_encode = pd.concat([city_hotels_df, city_hotels_df.Features.str.get_dummies(sep=' ')], axis=1)

In [7]:
hotel_encode.head()

Unnamed: 0,Hotel_Name,City_Name,Features,000,001,002,003,004,005,006,...,246,247,248,249,250,251,252,253,254,256
0,Tanner's,atlanta,100 253 250 178 174 063 059 036 008 074 204 05...,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
1,Frijoleros,atlanta,250 062 132 174 063 197 071 142 234 243 075 20...,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,Indian Delights,atlanta,253 250 150 174 083 059 036 117 243 076 205 05...,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
3,Great Wall,atlanta,253 191 192 174 036 039 075 204 052 163,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,The Brickery,atlanta,100 253 086 231 250 191 192 059 036 215 005 00...,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,1,0,0


In [8]:
hotel_hot_encode = hotel_encode.drop(['Features'], axis=1).copy()
hotel_hot_encode.head()

Unnamed: 0,Hotel_Name,City_Name,000,001,002,003,004,005,006,007,...,246,247,248,249,250,251,252,253,254,256
0,Tanner's,atlanta,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
1,Frijoleros,atlanta,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,Indian Delights,atlanta,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
3,Great Wall,atlanta,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,The Brickery,atlanta,0,0,0,0,0,1,0,0,...,0,0,0,0,1,0,0,1,0,0


In [9]:
hotel_hot_encode.describe()

Unnamed: 0,000,001,002,003,004,005,006,007,008,009,...,246,247,248,249,250,251,252,253,254,256
count,4160.0,4160.0,4160.0,4160.0,4160.0,4160.0,4160.0,4160.0,4160.0,4160.0,...,4160.0,4160.0,4160.0,4160.0,4160.0,4160.0,4160.0,4160.0,4160.0,4160.0
mean,0.000481,0.030048,0.002163,0.000962,0.077644,0.069471,0.010817,0.016346,0.092788,0.015625,...,0.01226,0.004327,0.098798,0.182452,0.297596,0.051683,0.125721,0.280288,0.016106,0.00024
std,0.021924,0.17074,0.046468,0.030997,0.267643,0.254284,0.103455,0.126818,0.290171,0.124035,...,0.110056,0.065645,0.298427,0.386263,0.457256,0.221412,0.331575,0.449194,0.125898,0.015504
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


# Data : 
1. __features.txt__ - A dictionary mapping from feature ID to descriptive text. 
For example, feature 072 corresponds to "Ethiopian" cuisine.

2. __city_hotel_features.txt__ - A file containing restaurant data.
Format : restaurant name [TAB] city [TAB] restaurant features (3 digits ids separated by spaces)

# Problems :
1. Given feature_x1, feature_x2, feature_x3 .... feature_xn find best 10 hotels.
2. Find top 5 important features for given city.
3. Generate a model to find similar hotels for given hotel x.
4. Generate a model to find similar feature for given feature y.
5. How to scale if number of hotels OR number of features are more.
6. Segment/Cluster, visualize ... features or hotels based on given data to get the insights.



## 1. Given feature_x1, feature_x2, .... feature_xn find best 10 hotels

In [10]:
hotel_hot_encode.head()

Unnamed: 0,Hotel_Name,City_Name,000,001,002,003,004,005,006,007,...,246,247,248,249,250,251,252,253,254,256
0,Tanner's,atlanta,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
1,Frijoleros,atlanta,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,Indian Delights,atlanta,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
3,Great Wall,atlanta,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,The Brickery,atlanta,0,0,0,0,0,1,0,0,...,0,0,0,0,1,0,0,1,0,0


In [11]:
xyz = (hotel_hot_encode.iloc[261] == hotel_hot_encode.iloc[262])
xyz[xyz == False]

052    False
053    False
162    False
163    False
dtype: bool

In [12]:
# hotel_hot_encode.groupby(['City_Name', 'Hotel_Name']).sum().loc['atlanta'].sort_values('Total', ascending=False).head(10)#.loc['Soul Vegetarian']

Our aim is to get the total number of features a hotel has. So hotels with most number of features are considered as better than hotels with fewer features. We consider all features as equal and bias towards no one feature.

Now the first hurdle is many hotels are duplicated with different feature list. So the first thing to do is to remove duplicates. if hotel A has [001,002] features in first occurrence while [001,003] in another than its occurences should be merged into a single row with features [001,002,003]. This is accomplished using _Pandas_ _groupby_ method and then summing the duplicate rows. Now features present in both the duplicate occurences will give sum > 1 so we make the data frame to fill all values > 1 with 1.

This is a lot but in reality its done in 2 lines using Pandas so win win :D

In [13]:
df = hotel_hot_encode.groupby(['Hotel_Name', 'City_Name']).sum().copy()
df[df != 0] = 1

In [14]:
df['Total'] = df.sum(axis=1)

Below is just for a side idea we had for top 10 hotels for a city:
We listed the top 10 hotels based on total no. of features in Atlanta city

In [15]:
df.groupby(['City_Name', 'Hotel_Name']).sum().loc['atlanta'].sort_values('Total', ascending=False).head(10)#.loc['Soul Vegetarian']

Unnamed: 0_level_0,000,001,002,003,004,005,006,007,008,009,...,247,248,249,250,251,252,253,254,256,Total
Hotel_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Dante's Down the Hatch,0,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,1,0,0,22
Anthony's,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,1,0,0,21
East Village Grille,0,0,0,0,1,0,0,0,1,0,...,0,1,0,1,0,0,0,0,0,21
Carbo's Cafe,0,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,1,0,0,21
Aunt Fanny's Cabin,0,1,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,0,0,0,20
57th Fighter Group,0,0,0,0,0,0,0,0,1,0,...,0,1,0,1,0,0,1,0,0,20
Taco Mac,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,20
Ray's on the River,0,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,0,1,0,0,20
Cafe Intermezzo,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,19
Imperial Fez,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,19


In [16]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,000,001,002,003,004,005,006,007,008,009,...,247,248,249,250,251,252,253,254,256,Total
Hotel_Name,City_Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Carnegie Deli,washington_dc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,1,0,0,0,13
Le Gaulois,washington_dc,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,13
101,new_york,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
101 Seafood,new_york,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
103 NYC,new_york,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5


_top_\__hotels_ gives the best 10 hotels from a list of given features

In [17]:
def top_hotels(features):
    dic = {}
    for index, row in df.iterrows():
        features_present = [feature for feature in features if row[feature]]
        if len(features_present) == len(features):
            dic[df.loc[index]['Total']] = index
    return [[key,value] for (key, value) in sorted(dic.items(), reverse=True)][:10]
top_hotels(['005'])

[[28, ('SEASONS', 'washington_dc')],
 [27, ("KINKEAD'S", 'washington_dc')],
 [26, ('Jefferson Restaurant', 'washington_dc')],
 [25, ('THE COLONNADE', 'washington_dc')],
 [24, ('Melrose', 'washington_dc')],
 [23, ('West End Cafe', 'washington_dc')],
 [22, ('ROWES WHARF RESTAURANT & CAFE', 'boston')],
 [21, ('ST. CLOUD', 'boston')],
 [20, ('Willow Grove Inn', 'washington_dc')],
 [19, ('THE IVY', 'los_angeles')]]

# 2. Find top 5 important features for given city.

For this we calculate the features that most frequently occur in a city and sort them according to that. We return a series in which values are named with name of the city, index as the features and values as their frequency

In [18]:
def top_features(city):
    return df.groupby('City_Name').sum().loc[city].sort_values(ascending=False).head(6)[1:]
    
top_features('atlanta')

174    223
253    197
205    163
250    157
192    154
Name: atlanta, dtype: int64

_get_\__fname_ is just a function we created to get list of feature name tuples for a given list of feature IDs

In [19]:
def get_fname(id_list):
    return [(id, features_df.loc[int(id)]['Feature_Name']) for id in id_list]

top_freq_ids = top_features('atlanta')
get_fname(top_freq_ids.index)

[('174', 'Parking/Valet'),
 ('253', 'Wheelchair Access'),
 ('205', 'Excellent Service'),
 ('250', 'Weekend Dining'),
 ('192', 'Private Rooms Available')]

# Generate a model to find similar hotels for given hotel x
We found out correlation is the wrong approach here. It just doesn't work here. We tried to find similar hotel to the 'Carnegie Dell' hotel of 'washington_dc' but the very first in the list has only a single feature common. So we knew something is wrong here. There is got to be more commons among hotels. Something fishy is going on here... :(

In [20]:
%matplotlib inline
df.iloc[0:].corrwith(df.iloc[0],axis=1).sort_values()

Hotel_Name                    City_Name    
PATSY'S PIZZA (Bklyn)         new_york         0.860047
Taste of Hong Kong            new_york         0.860047
Taci International            new_york         0.860047
Perretti Italian Cafe         new_york         0.860047
BISTRO AT MAISON DE VILLE     new_orleans      0.860047
Cornelia Street Cafe          new_york         0.860047
Brasserie des Theatres        new_york         0.860047
UNION SQUARE CAFE             new_york         0.860047
Wurzburg-Haus                 washington_dc    0.860047
MIKE'S ON THE AVENUE          new_orleans      0.860047
Gramercy Watering Hole        new_york         0.860047
Swing Street Cafe             new_york         0.860047
Benito I                      new_york         0.860047
Restaurant Two Two Two        new_york         0.860047
Manganaro Grosseria Italiana  new_york         0.860047
Caffe Bond!                   new_york         0.860047
Delmonico's Seafood Grille    los_angeles      0.860047
Cour

In [21]:
# df.iloc[0].to_csv('wdc1')
# df.loc[(' Le Gaulois', 'washington_dc')].to_csv('wdc2')
# df.loc[[(' Carnegie Deli', 'washington_dc'),('PATSY\'S PIZZA (Bklyn)', 'new_york')]].to_csv('wdc3')

_get_\__features_ is just a function we created to get the list of features a hotel has in a given dataframe. And thats how we knew our correlation analysis isn't working.

In [22]:
def get_features(hotel, df):
    hotel_features = df.loc[hotel]
    return sorted(get_fname(hotel_features[hotel_features == 1].index))
get_features(df.index[0], df)

[('008', 'American (Traditional)'),
 ('036', 'Catering for Special Events'),
 ('040', 'Classic Hotel Dining'),
 ('051', 'Fair Decor'),
 ('058', 'Deli'),
 ('074', 'Good Food'),
 ('086', 'For the Young and Young at Heart'),
 ('100', 'Good for Younger Kids'),
 ('163', 'below $15'),
 ('174', 'Parking/Valet'),
 ('204', 'Good Service'),
 ('250', 'Weekend Dining'),
 ('252', 'Weekend Lunch')]

In [23]:
get_features(('PATSY\'S PIZZA (Bklyn)', 'new_york'), df)

[('052', 'Good Decor'),
 ('077', 'Near-perfect Food'),
 ('164', '$15-$30'),
 ('205', 'Excellent Service')]

In [24]:
get_features((' Le Gaulois', 'washington_dc'), df)

[('053', 'Excellent Decor'),
 ('059', 'Delivery Available'),
 ('076', 'Extraordinary Food'),
 ('092', 'French Bistro'),
 ('093', 'French Classic'),
 ('111', 'Health Conscious Menus'),
 ('166', '$15-$30'),
 ('191', 'Private Parties'),
 ('192', 'Private Rooms Available'),
 ('205', 'Excellent Service'),
 ('231', 'Takeout Available'),
 ('248', 'Warm spots by the fire'),
 ('252', 'Weekend Lunch')]

So we changed our analysis tactics. We first created a dataframe with only the features present in our given hotel(we also dropped the hotel from our new dataframe so that it doesn't create problems with further analysis). We then sorted the hotels in the new dataframe again based on max. total features and pick the top 5. Also note here that the parent dataframe is actually sorted on basis of most no. of features, so any hotels with the same total in our new dataframe will automatically be sorted in the most no. of features order.

In [66]:
def get_same_hotels(hotel, df):
    df = df[[item[0] for item in get_features(hotel, df)]]
    return df.sum(axis=1).sort_values(ascending=False).head(6).drop(hotel)
l1 = [i for i in get_same_hotels(df.index[0], df).index]

In [67]:
hotel_df = df[[item[0] for item in get_features(df.index[0], df)]]
hotel_df.loc[l1]

Unnamed: 0_level_0,Unnamed: 1_level_0,008,036,040,051,058,074,086,100,163,174,204,250,252
Hotel_Name,City_Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Tanner's,atlanta,1,1,0,0,0,1,0,1,1,1,1,1,0
RUBIN'S,boston,0,1,0,1,1,0,1,1,1,0,1,1,0
Bucket Shop,atlanta,1,1,0,0,0,1,0,1,1,1,1,1,0
Mel's Drive-In,san_francisco,1,0,0,0,0,1,1,1,1,1,1,1,0
Ledo Pizza,washington_dc,0,0,0,1,0,0,1,1,1,1,1,1,1


8 features in common with the given hotel. Now bob's your uncle and we got something meaningful. 

In [30]:
get_features(('Ledo Pizza', 'washington_dc'),df)

[('051', 'Fair Decor'),
 ('075', 'Excellent Food'),
 ('086', 'For the Young and Young at Heart'),
 ('100', 'Good for Younger Kids'),
 ('150', 'No Smoking Allowed'),
 ('163', 'below $15'),
 ('174', 'Parking/Valet'),
 ('182', 'Pizza'),
 ('204', 'Good Service'),
 ('250', 'Weekend Dining'),
 ('252', 'Weekend Lunch'),
 ('253', 'Wheelchair Access'),
 ('254', 'Wine and Beer')]

In [31]:
get_features(df.index[0],df)

[('008', 'American (Traditional)'),
 ('036', 'Catering for Special Events'),
 ('040', 'Classic Hotel Dining'),
 ('051', 'Fair Decor'),
 ('058', 'Deli'),
 ('074', 'Good Food'),
 ('086', 'For the Young and Young at Heart'),
 ('100', 'Good for Younger Kids'),
 ('163', 'below $15'),
 ('174', 'Parking/Valet'),
 ('204', 'Good Service'),
 ('250', 'Weekend Dining'),
 ('252', 'Weekend Lunch')]

In [56]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,000,001,002,003,004,005,006,007,008,009,...,247,248,249,250,251,252,253,254,256,Total
Hotel_Name,City_Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Carnegie Deli,washington_dc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,1,0,0,0,13
Le Gaulois,washington_dc,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,13
101,new_york,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
101 Seafood,new_york,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
103 NYC,new_york,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5


# 4. Generate a model to find similar feature for given feature y.
This is similar to previous one but instead of columns you have to think in terms of rows. And do the same thing we did above the columns to the rows. So we sliced the DF with only those hotels which have the feature. Afterwards its simple to sum up and get the most frequently occuring features in those hotels and sort them according to that.

In [74]:
def get_same_feature(feature, df):
    return df[df[feature] == 1].sum(axis=0).sort_values(ascending=False).head(7).drop([feature, 'Total'])

In [75]:
get_same_feature('000',df)

145    2
253    1
074    1
087    1
086    1
dtype: int64

In [76]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,000,001,002,003,004,005,006,007,008,009,...,247,248,249,250,251,252,253,254,256,Total
Hotel_Name,City_Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Carnegie Deli,washington_dc,0,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,1,0,0,0,13
Le Gaulois,washington_dc,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,13
101,new_york,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
101 Seafood,new_york,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
103 NYC,new_york,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
103 WEST,atlanta,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,15
107 West,new_york,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
1789,washington_dc,0,0,0,0,0,1,0,1,0,0,...,0,1,0,1,0,0,0,0,0,22
17th Street Cafe,los_angeles,0,0,0,0,0,0,0,0,0,0,...,0,1,1,0,1,1,0,0,0,12
1848 House,atlanta,0,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,1,0,0,17
