## This 

In [147]:
import pandas as pd
import csv
import os

 ## CHANGE to your API key
 Where the key string is passed to YelpAPI(): change that to your own!!

In [138]:
from yelpapi import YelpAPI 
yelp_api = YelpAPI('Your API KEY')

Complete list of parameters and output can be found here:
https://www.yelp.com/developers/documentation/v3/business_search

List and descriptions of output: https://www.yelp.com/developers/documentation/v3/business

## import your csvs:
load the folder in which they are held and iterate through that folder, appending each df to one another

In [139]:
#csv_loc = 'path to folder where your CSVs are stored'
#csv_loc = 'Users/sabinahartnett/Desktop/UChicago/CS_Winter2021/final-project-localgorithms/dummyCSV/'

csv_loc = 'CSVs/'
closed_loc = 'closed_wb/'
missing_loc = 'missing_wb/'
#change this to another name
out_file_yelp = "yelp_all.csv"
out_file_closed = "closed_all_wb.csv"
out_file_missing = "missing_all_wb.csv"

ALL_loc = 'combined/'
out_file_ALL = "all_combined_wb.csv"

In [178]:
#to clean all the data - create a folder of thefiles for closed businesses and be sure they are all listed as closed,
#then run over EVERYTHING to combine
def combine_csvs(folder_path, are_closed, ouput_filename):
    '''
    Parses through a folder in the remote location and creates
    a dataframe of all unique entries (prioritizing closed businesses),
    and outputs that DF as a CSV.
    
    Inputs:
        folder_path: path to folder with input csvs
        are_closed: whether to check that all businesses in those CSVs
            are closed ('is_closed' = True): will overwrite if not.
        output_filename: name of file to write out to (outside of CSV folder)
    
    Ouput:
        all_pd: a full pandas dataframe of all unique businesses from CSVs
        csv file: written out version of the full pandas DF.
    '''
    
    csv_os = os.listdir(folder_path)
    #note: order is whatever order the files exist in in the folder (likely alphabetic)
    all_pd= pd.DataFrame()
    
    for file_ in csv_os:
        df1 = pd.read_csv(folder_path + file_) #, index_col = 'Unnamed: 0')
        
        print("importing:", file_, "of length:", len(df1), "changing all businesses to closed = ", are_closed)
        
        if are_closed:
            df1['is_closed'] = True
        
        #note indecies won't line up so we have to overwrite them
        all_pd = pd.concat([all_pd, df1], axis = 0, join = 'outer', sort=True, ignore_index=True)
    
    #drop 'unnamed' column
    all_pd.drop(['Unnamed: 0'], axis = 1, errors = 'ignore', inplace = True)
    #businesses will be sorted by name and is_closed ('True' > 'False')
    all_pd.sort_values(by=['id', 'is_closed'], inplace = True)
    #remove the 'lower value' of the location: if one is 'True' keep that
    all_pd.drop_duplicates(subset = 'id', keep='last', inplace = True)
    
    all_pd.to_csv(ouput_filename, index = True)
    
    return all_pd

In [180]:
#combine_yelps = combine_csvs('CSVs/', False, "all_OG_yelp.csv")

In [115]:
yelp_DF = combine_csvs(csv_loc, False, out_file_yelp)

importing: egemen_all.csv of length: 8432 changing all businesses to closed =  False
importing: .DS_Store of length: 0 changing all businesses to closed =  False
importing: SH_all_business_query.csv of length: 2713 changing all businesses to closed =  False
importing: mk_complete.csv of length: 8029 changing all businesses to closed =  False
importing: all_Jade_data.csv of length: 2717 changing all businesses to closed =  False


In [181]:
closed_DF = combine_csvs(closed_loc, True, out_file_closed)

importing: yelp_results_from_now_closed_wb.csv of length: 539 changing all businesses to closed =  True
   Unnamed: 0                                alias  business type  \
0           0         one-eleven-food-hall-chicago     Restaurant   
1           1             karlas-kitchen-chicago-2     Restaurant   
2           2  lincoln-quality-meat-market-chicago  Grocery Store   
3           3         one-eleven-food-hall-chicago         Bakery   
4           4                 taco-bell-chicago-52     Restaurant   

                                          categories  \
0   [{'alias': 'food_court', 'title': 'Food Court'}]   
1  [{'alias': 'comfortfood', 'title': 'Comfort Fo...   
2  [{'alias': 'meats', 'title': 'Meat Shops'}, {'...   
3   [{'alias': 'food_court', 'title': 'Food Court'}]   
4  [{'alias': 'tacos', 'title': 'Tacos'}, {'alias...   

                                         coordinates   display_phone  \
0  {'latitude': 41.6927953199255, 'longitude': -8...             NaN   


In [182]:
missing_DF = combine_csvs(missing_loc, False, out_file_missing)

importing: .DS_Store of length: 0 changing all businesses to closed =  False
Empty DataFrame
Columns: [Unnamed: 0]
Index: []
importing: yelp_results_from_missing_after_1_wb.csv of length: 1626 changing all businesses to closed =  False
  Unnamed: 0                             alias  business type  \
0          0      carmelo-s-taco-place-chicago     Restaurant   
1          1  maggie-gyros-and-chicken-chicago     Restaurant   
2          2           morenas-kitchen-chicago     Restaurant   
3          3            northern-trust-chicago     Restaurant   
4          4             deli-and-more-chicago  Grocery Store   

                                          categories  \
0             [{'alias': 'tacos', 'title': 'Tacos'}]   
1  [{'alias': 'greek', 'title': 'Greek'}, {'alias...   
2     [{'alias': 'dominican', 'title': 'Dominican'}]   
3  [{'alias': 'banks', 'title': 'Banks & Credit U...   
4  [{'alias': 'delis', 'title': 'Delis'}, {'alias...   

                                    

In [183]:
full_DF = combine_csvs(ALL_loc, False, out_file_ALL)

importing: all_OG_yelp.csv of length: 11043 changing all businesses to closed =  False
   Unnamed: 0                                  alias  \
0        8344                   burger-king-skokie-2   
1       20907  xochimilco-mexican-restaurant-chicago   
2       14901                altos-de-jalisco-cicero   
3       21151                     los-flacos-chicago   
4       20536                      dunkin-chicago-11   

                                          categories  \
0  [{'alias': 'hotdogs', 'title': 'Fast Food'}, {...   
1         [{'alias': 'mexican', 'title': 'Mexican'}]   
2         [{'alias': 'mexican', 'title': 'Mexican'}]   
3         [{'alias': 'mexican', 'title': 'Mexican'}]   
4  [{'alias': 'donuts', 'title': 'Donuts'}, {'ali...   

                                         coordinates   display_phone  \
0      {'latitude': 42.01167, 'longitude': -87.7603}             NaN   
1  {'latitude': 41.9616665, 'longitude': -87.6802...    +17736611534   
2  {'latitude': 41.8654

In [134]:
len(full_DF) #12 465

12465