In [32]:
import pandas as pd
from build_database import (default_params, load_api_key, 
                          make_request, build_database,
                          add_to_database_if_new, retrieve_database
                         )
from pymongo import MongoClient
import time
import yaml
import requests


In [33]:
we_eat_client = MongoClient()

In [34]:
we_eat_database = we_eat_client['we_eat']
restaurant_collection = we_eat_database['restaurants']

In [38]:
build_database()

0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950


In [39]:
mile_from_galvanize = retrieve_database()

In [40]:
mile_from_galvanize.shape

(1403, 17)

In [107]:
from clean_database import (add_distance_from_galvanize, change_price_nulls,
                            clean_cats, add_clean_cats, dummify_price,
                            cat_counts, add_lat_longs, separate_coords,
                            load_categories, drop_unnecessaries, 
                            add_popularity, make_cats_csv,
                            get_category_dummies
                           )


In [42]:
add_lat_longs(mile_from_galvanize)

In [43]:
add_distance_from_galvanize(mile_from_galvanize)

In [94]:
galvanize_restaurants_df = mile_from_galvanize[mile_from_galvanize['dist_from_galvanize']<1]

In [95]:
galvanize_restaurants_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 658 entries, 0 to 1400
Data columns (total 20 columns):
_id                    658 non-null object
alias                  658 non-null object
categories             658 non-null object
coordinates            658 non-null object
display_phone          658 non-null object
distance               658 non-null float64
id                     658 non-null object
image_url              658 non-null object
is_closed              658 non-null bool
location               658 non-null object
name                   658 non-null object
phone                  658 non-null object
price                  597 non-null object
rating                 658 non-null float64
review_count           658 non-null int64
transactions           658 non-null object
url                    658 non-null object
lats                   658 non-null float64
longs                  658 non-null float64
dist_from_galvanize    658 non-null float64
dtypes: bool(1), float64(5), int

In [96]:
add_clean_cats(galvanize_restaurants_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df['cats'] = df['categories'].apply(clean_cats)


In [112]:
cat_counts(galvanize_restaurants_df).most_common()

[('sandwiches', 106),
 ('newamerican', 61),
 ('seafood', 60),
 ('breakfast_brunch', 60),
 ('tradamerican', 53),
 ('cafes', 48),
 ('hotdogs', 46),
 ('delis', 43),
 ('bars', 41),
 ('chinese', 37),
 ('salad', 36),
 ('vietnamese', 34),
 ('japanese', 30),
 ('noodles', 30),
 ('coffee', 29),
 ('bakeries', 28),
 ('mexican', 26),
 ('asianfusion', 26),
 ('foodtrucks', 26),
 ('pizza', 26),
 ('cocktailbars', 25),
 ('sushi', 25),
 ('italian', 24),
 ('burgers', 21),
 ('foodstands', 21),
 ('cantonese', 20),
 ('bbq', 20),
 ('korean', 19),
 ('thai', 18),
 ('dimsum', 15),
 ('soup', 14),
 ('sportsbars', 14),
 ('mediterranean', 13),
 ('hotdog', 12),
 ('indpak', 11),
 ('tapasmallplates', 10),
 ('pubs', 10),
 ('poke', 10),
 ('wine_bars', 9),
 ('grocery', 9),
 ('hotpot', 9),
 ('french', 9),
 ('buffets', 8),
 ('juicebars', 8),
 ('lounges', 8),
 ('venues', 7),
 ('steak', 7),
 ('desserts', 7),
 ('taiwanese', 7),
 ('hawaiian', 7),
 ('diners', 7),
 ('wraps', 6),
 ('russian', 6),
 ('german', 6),
 ('beer_and_wine',

In [99]:
make_cats_csv(galvanize_restaurants_df)

In [100]:
categories = load_categories()


In [101]:
dummy_cats_df = get_category_dummies(galvanize_restaurants_df['cats'])

In [102]:
dummified_restaurant_df = pd.concat([galvanize_restaurants_df, dummy_cats_df], axis=1)

In [103]:
add_popularity(dummified_restaurant_df)

In [104]:
change_price_nulls(dummified_restaurant_df)

In [105]:
dummify_price(dummified_restaurant_df)

In [110]:
cleaned_df = drop_unnecessaries(dummified_restaurant_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [111]:
cleaned_df.head()

Unnamed: 0,alias,id,image_url,location,rating,review_count,transactions,url,dist_from_galvanize,cats,...,category_shanghainese,category_cafeteria,category_seafoodmarkets,category_gelato,category_panasian,popularity,$,$$,$$$,$$$$
0,amazon-go-seattle-5,XDfmz7Woxx6NkZDGBtAVAQ,https://s3-media1.fl.yelpcdn.com/bphoto/fDQhxw...,"{'address1': '920 5th Ave', 'address2': None, ...",5.0,7,[],https://www.yelp.com/biz/amazon-go-seattle-5?a...,0.523317,"convenience,salad,sandwiches",...,0,0,0,0,0,35.0,0,1,0,0
1,the-pink-door-seattle-4,VOPdG8llLPaga9iJxXcMuQ,https://s3-media1.fl.yelpcdn.com/bphoto/c_vzRF...,"{'address1': '1919 Post Alley', 'address2': ''...",4.5,4019,[],https://www.yelp.com/biz/the-pink-door-seattle...,0.877225,"italian,wine_bars,seafood",...,0,0,0,0,0,18085.5,0,1,0,0
2,nirmals-seattle,GXz21OgpWOtnCF0GDXHPhA,https://s3-media3.fl.yelpcdn.com/bphoto/LVSEXb...,"{'address1': '106 Occidental Ave S', 'address2...",4.0,424,"[restaurant_reservation, pickup]",https://www.yelp.com/biz/nirmals-seattle?adjus...,0.178899,indpak,...,0,0,0,0,0,1696.0,0,1,0,0
3,bad-bishop-seattle,KFMsgY5mV_wCYsa0XX_hEQ,https://s3-media1.fl.yelpcdn.com/bphoto/1GpjX0...,"{'address1': '704 1st Ave', 'address2': None, ...",4.5,6,[],https://www.yelp.com/biz/bad-bishop-seattle?ad...,0.266417,"cocktailbars,comfortfood",...,0,0,0,0,0,27.0,0,1,0,0
4,pike-place-chowder-seattle,6I28wDuMBR5WLMqfKxaoeg,https://s3-media3.fl.yelpcdn.com/bphoto/ijju-w...,"{'address1': '1530 Post Aly', 'address2': 'Ste...",4.5,6317,[pickup],https://www.yelp.com/biz/pike-place-chowder-se...,0.79345,"seafood,soup",...,0,0,0,0,0,28426.5,0,1,0,0


## Pictures!

In [117]:
from get_pics import get_urls, display_pics, find_most_popular

In [137]:
def top_twenty_cats(df):
    temp_list = cat_counts(df).most_common()[:19]
    top_twenty_cats = []
    for item in temp_list:
        top_twenty_cats.append('category_'+item[0])
    return top_twenty_cats

In [138]:
top_twenty = top_twenty_cats(cleaned_df)
top_twenty

['category_sandwiches',
 'category_newamerican',
 'category_seafood',
 'category_breakfast_brunch',
 'category_tradamerican',
 'category_cafes',
 'category_hotdogs',
 'category_delis',
 'category_bars',
 'category_chinese',
 'category_salad',
 'category_vietnamese',
 'category_japanese',
 'category_noodles',
 'category_coffee',
 'category_bakeries',
 'category_mexican',
 'category_asianfusion',
 'category_foodtrucks']

In [139]:
most_popular = find_most_popular(cleaned_df, top_twenty)

category_sandwiches: biscuit-bitch-seattle
category_newamerican: purple-café-and-wine-bar-seattle-3
category_seafood: pike-place-chowder-seattle
category_breakfast_brunch: biscuit-bitch-seattle
category_tradamerican: metropolitan-grill-seattle
category_cafes: café-campagne-seattle-3
category_hotdogs: mod-pizza-seattle-7
category_delis: tats-delicatessen-seattle
category_bars: ivars-acres-of-clams-seattle-7
category_chinese: mikes-noodle-house-seattle
category_salad: specialtys-café-and-bakery-seattle-34
category_vietnamese: tamarind-tree-seattle
category_japanese: japonessa-sushi-cocina-seattle
category_noodles: tamarind-tree-seattle
category_coffee: biscuit-bitch-seattle
category_bakeries: piroshky-piroshky-seattle
category_mexican: el-borracho-seattle
category_asianfusion: chan-seattle-seattle-2
category_foodtrucks: where-ya-at-matt-seattle


In [140]:
display_pics(most_popular, cleaned_df)

pike-place-chowder-seattle


tats-delicatessen-seattle


japonessa-sushi-cocina-seattle


metropolitan-grill-seattle


mikes-noodle-house-seattle


tamarind-tree-seattle


biscuit-bitch-seattle


chan-seattle-seattle-2


piroshky-piroshky-seattle


where-ya-at-matt-seattle


purple-café-and-wine-bar-seattle-3


ivars-acres-of-clams-seattle-7


el-borracho-seattle


café-campagne-seattle-3


mod-pizza-seattle-7


specialtys-café-and-bakery-seattle-34
