# Restaurants to CSV
Program to generate CSV file of all restaurants.

In [1]:
import json
import pandas as pd
import numpy as np
import math
import time

### Reading the data

In [2]:
business_df = pd.read_json('./yelp/yelp_academic_dataset_business.json', lines=True)

In [3]:
business_df.head(2)

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state
0,921 Pearl St,"{'RestaurantsTableService': 'True', 'WiFi': 'u...",6iYb2HFDywm3zjuRg0shjw,"Gastropubs, Food, Beer Gardens, Restaurants, B...",Boulder,"{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...",1,40.017544,-105.283348,Oskar Blues Taproom,80302,86,4.0,CO
1,7000 NE Airport Way,"{'RestaurantsTakeOut': 'True', 'RestaurantsAtt...",tCbdrRPZA0oiIYSmHG3J0w,"Salad, Soup, Sandwiches, Delis, Restaurants, C...",Portland,"{'Monday': '5:0-18:0', 'Tuesday': '5:0-17:0', ...",1,45.588906,-122.593331,Flying Elephants at PDX,97218,126,4.0,OR


### Cleaning the data

In [4]:
business_df = business_df.dropna()

In [5]:
restaurants_df = business_df.loc[business_df["categories"].str.contains("Restaurants")]

In [6]:
# Remove outliers
restaurants_df = restaurants_df[~restaurants_df["state"].isin(["WY", "VA", "NH", "MN", "KY", "KS", "ABE"])]
restaurants_df = restaurants_df[restaurants_df["longitude"] < 0]
len(restaurants_df)

42638

In [7]:
cities = restaurants_df.groupby("city").size()
unique_cities = cities[cities < 20].keys()
restaurants_df = restaurants_df[~restaurants_df["city"].isin(unique_cities)]

In [8]:
len(restaurants_df)

41664

### CSV of Restaurants with Information for Training

In [10]:
restaurants_df.head(5)

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state
0,921 Pearl St,"{'RestaurantsTableService': 'True', 'WiFi': 'u...",6iYb2HFDywm3zjuRg0shjw,"Gastropubs, Food, Beer Gardens, Restaurants, B...",Boulder,"{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...",1,40.017544,-105.283348,Oskar Blues Taproom,80302,86,4.0,CO
1,7000 NE Airport Way,"{'RestaurantsTakeOut': 'True', 'RestaurantsAtt...",tCbdrRPZA0oiIYSmHG3J0w,"Salad, Soup, Sandwiches, Delis, Restaurants, C...",Portland,"{'Monday': '5:0-18:0', 'Tuesday': '5:0-17:0', ...",1,45.588906,-122.593331,Flying Elephants at PDX,97218,126,4.0,OR
5,3755 Main St,"{'GoodForKids': 'True', 'Alcohol': 'u'none'', ...",D4JtQNTI4X3KcbzacDJsMw,"Restaurants, Thai",Vancouver,"{'Monday': '17:0-21:0', 'Tuesday': '17:0-21:0'...",1,49.251342,-123.101333,Bob Likes Thai Food,V5V,169,3.5,BC
12,474 Lowell St,"{'RestaurantsGoodForGroups': 'True', 'HasTV': ...",HPA_qyMEddpAEtFof02ixg,"Food, Pizza, Restaurants",Peabody,"{'Monday': '11:0-21:0', 'Tuesday': '11:0-21:0'...",1,42.541155,-70.973438,Mr G's Pizza & Subs,01960,39,4.0,MA
13,247 E Michigan St,"{'BusinessParking': '{'garage': False, 'street...",ufCxltuh56FF4-ZFZ6cVhg,"Restaurants, American (New), Bakeries, Dessert...",Orlando,"{'Tuesday': '11:0-18:0', 'Wednesday': '11:0-18...",1,28.513265,-81.374707,Sister Honey's,32806,135,4.5,FL


In [11]:
restaurants_df_full = restaurants_df.drop(columns=['address', 'postal_code', 'state', 'business_id', 'city', 'latitude',
                                                  'longitude'])

In [12]:
restaurants_df_full.head()

Unnamed: 0,attributes,categories,hours,is_open,name,review_count,stars
0,"{'RestaurantsTableService': 'True', 'WiFi': 'u...","Gastropubs, Food, Beer Gardens, Restaurants, B...","{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...",1,Oskar Blues Taproom,86,4.0
1,"{'RestaurantsTakeOut': 'True', 'RestaurantsAtt...","Salad, Soup, Sandwiches, Delis, Restaurants, C...","{'Monday': '5:0-18:0', 'Tuesday': '5:0-17:0', ...",1,Flying Elephants at PDX,126,4.0
5,"{'GoodForKids': 'True', 'Alcohol': 'u'none'', ...","Restaurants, Thai","{'Monday': '17:0-21:0', 'Tuesday': '17:0-21:0'...",1,Bob Likes Thai Food,169,3.5
12,"{'RestaurantsGoodForGroups': 'True', 'HasTV': ...","Food, Pizza, Restaurants","{'Monday': '11:0-21:0', 'Tuesday': '11:0-21:0'...",1,Mr G's Pizza & Subs,39,4.0
13,"{'BusinessParking': '{'garage': False, 'street...","Restaurants, American (New), Bakeries, Dessert...","{'Tuesday': '11:0-18:0', 'Wednesday': '11:0-18...",1,Sister Honey's,135,4.5


In [13]:
len(restaurants_df_full)

41664

In [15]:
restaurants_df_full.to_csv("./restaurants.csv", index=True)

### CSV for Creating Edges between Restaurants

In [6]:
restaurants_df_loc = restaurants_df.drop(columns=['address', 'categories', 'name', 'review_count', 
                                                  'stars', 'attributes', 'hours'])

In [7]:
restaurants_df_loc.head(10)

Unnamed: 0,business_id,city,is_open,latitude,longitude,postal_code,state
0,6iYb2HFDywm3zjuRg0shjw,Boulder,1,40.017544,-105.283348,80302,CO
1,tCbdrRPZA0oiIYSmHG3J0w,Portland,1,45.588906,-122.593331,97218,OR
5,D4JtQNTI4X3KcbzacDJsMw,Vancouver,1,49.251342,-123.101333,V5V,BC
12,HPA_qyMEddpAEtFof02ixg,Peabody,1,42.541155,-70.973438,01960,MA
13,ufCxltuh56FF4-ZFZ6cVhg,Orlando,1,28.513265,-81.374707,32806,FL
16,GfWJ19Js7wX9rwaHQ7KbGw,Orlando,1,28.350498,-81.542819,32830,FL
22,ynTjh_FdhbG5hY69HsEoaA,Orlando,0,28.381945,-81.510327,32836,FL
26,hcRxdDg7DYryCxCoI8ySQA,Boston,1,42.338544,-71.106842,02215,MA
29,jGennaZUr2MsJyRhijNBfA,Boston,1,42.363442,-71.025781,02128,MA
33,QciD6FbNklaJvUbLExD4Nw,Burnaby,1,49.254436,-123.02005,V5G 1G4,BC


In [11]:
restaurants_df_loc.to_csv("./restaurants_loc_only.csv", index=True)