In [None]:
import pandas as pd 
import numpy as np

In [None]:
name = 'Revere' #name of the town
town = pd.read_csv(name+'-google.csv') #file name, in this case they all followed format "town-google.csv"

In [None]:
#strip all white space and split the types into a list for easier searching
town['type_list'] = town['types'].str.replace(' ','').str.split(',')

In [None]:
#code_dict maps tags to 4 digit NAICS codes
#sometimes >4 digits if the more specigic naics code is trivially easy to find
#these are basically all assgined by hand
code_dict = {}
code_dict['bar'] = 7224
code_dict['liquor_store'] = 4248
code_dict['grocery_or_supermarket'] =  4244
code_dict['secondary_school'] = 6111
code_dict['school'] = 6111
code_dict['lodging'] = 7211
code_dict['car_dealer'] = 4411
code_dict['bakery'] = 4452
code_dict['car_repair'] = 8111
code_dict['jewelry_store'] = 4239
code_dict['bank'] = 5221
code_dict['department_store'] = 4521
code_dict['gym'] = 7139
code_dict['dentist'] = 6212
code_dict['hardware_store'] = 4237
code_dict['furniture_store'] = 4232
code_dict['pharmacy'] = 4461
code_dict['drugstore'] = 4461
code_dict['clothing_store'] = 4481
code_dict['pet_store'] = 4539
code_dict['electronics_store'] = 4431
code_dict['local_government_office'] = 9211
code_dict['city_hall'] = 9211
code_dict['place_of_worship'] = 8131
code_dict['electrician'] = 2382
code_dict['restaurant'] = 7225
code_dict['convenience_store'] = 44512
code_dict['shoe_store'] = 4482
code_dict['hair_care'] = 81211
code_dict['doctor'] = 6211
code_dict['insurance_agency'] = 5242
code_dict['lawyer'] = 5411
code_dict['veterinary_care'] = 54194
code_dict['book_store'] = 451211
code_dict['university'] = 6113
code_dict['funeral_home'] = 8122
code_dict['post_office'] = 4911
code_dict['library'] = 51912
code_dict['roofing_contractor'] = 2381
code_dict['storage'] = 4931
code_dict['atm'] = 5221 #used for credit union
code_dict['movie_theater'] = 5121
code_dict['florist'] = 4531
code_dict['beauty_salon'] = 8121
code_dict['spa'] = 8121
code_dict['real_estate_agency'] = 5312
code_dict['home_goods_store'] = 4422
code_dict['movie_rental'] =  5322
code_dict['hospital'] =  6221
code_dict['moving_company'] =  4842
code_dict['police'] =  9221

In [None]:
#iterate through, assigning NAICS codes based on the dictionary above
#the remaining set will keep track of tags for any business that still does not yet have a code asssigned
#use the remaining set to add more keys to the dictionary
remaining = set()
for row in range(len(town)):
    types = town['type_list'][row]
    for elem in types:
        if elem in code_dict:
            town.at[row, 'naics'] = code_dict[elem]
            break
    else:
        #if the loop finishes, then we didn't find any match
        remaining |= set(town['type_list'][row])    
print(remaining) #show what tags are still remaining

In [None]:
#show all of the rows that were not assigned a NAICS code
#these typically will be tags like "point_of_interest" or "establishment"
town.loc[town['naics'] != town['naics']]

In [None]:
#delete the extra column
del town['type_list']

#save the file, name formatting can be changed to liking
town.to_csv(name+'-naics.csv', index = False)