In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import os
import json
import seaborn as sns


# Import API key
from config import g_key

In [2]:
# Files to Load (Remember to Change These)
fast_food_restaurants_2018 = "../Resources/Fast_Food_Restaurants_2018.csv"
census_2018 ="../Resources/census_data_clean_2018.csv"
# Read files into Pandas DF
Rest_2018 = pd.read_csv(fast_food_restaurants_2018)
census_2018=pd.read_csv(census_2018)

In [3]:
# remove columns we will not need
Rest_2018=Rest_2018.drop(['id','sourceURLs','websites'], axis =1)
Rest_2018.columns

Index(['dateAdded', 'dateUpdated', 'address', 'categories', 'city', 'country',
       'keys', 'latitude', 'longitude', 'name', 'postalCode', 'province'],
      dtype='object')

In [4]:
# reorganize/rename columns and drop other columns we will not need as well
Rest_2018 = Rest_2018.rename(columns={'name':'Name', 'address': 'Address', 'city': 'City', 'country': 'Country', 'latitude': 'Latitude', 'longitude':'Longitude', 'postalCode': 'Zip Code',\
                                     'province': 'State'})
Rest_2018 = Rest_2018[['Name', 'Address', 'City','State', 'Zip Code','Country', 'Latitude', 'Longitude']]
Rest_2018.head()

Unnamed: 0,Name,Address,City,State,Zip Code,Country,Latitude,Longitude
0,SONIC Drive In,800 N Canal Blvd,Thibodaux,LA,70301,US,29.814697,-90.814742
1,SONIC Drive In,800 N Canal Blvd,Thibodaux,LA,70301,US,29.814697,-90.814742
2,Taco Bell,206 Wears Valley Rd,Pigeon Forge,TN,37863,US,35.803788,-83.580553
3,Arby's,3652 Parkway,Pigeon Forge,TN,37863,US,35.782339,-83.551408
4,Steak 'n Shake,2118 Mt Zion Parkway,Morrow,GA,30260,US,33.562738,-84.321143


In [5]:
# get unique names in list and sort alphabetically to assist with cleaning
names = Rest_2018['Name'].unique()
names.sort()
print(len(names))
print(names)

571
['7-Eleven' '90 Miles Cuban Cafe' 'A&W/Long John Silvers'
 "A's Ace Burgers" 'Abruzzi Pizza' 'Acropolis Gyro Palace'
 'Adobe Cantina Salsa' 'Ak Buffet' 'All About Burger'
 'Ameci Pizza & Pasta' "Andy's Frozen Custard" "Angelina's Pizza"
 "Ani's Chicken" "Arby's" "Arby's - Closed" 'Arbys' 'Arctic Circle'
 "Arni's Crawfordsville" "Arthur Treacher's Fish & Chips" 'Au Bon Pain'
 "Auntie Anne's" "Auntie Anne's Pretzels" 'Ay Jalisco' 'B Good'
 'B&D Burgers' "B's Pizza" 'B.GOOD' 'BURGERFI' 'Back Yard Burgers'
 'Backyard Grill' "Bad Daddy's Burger Bar" 'Baja Fresh Mexican Grill'
 "Baker's Drive Thru" "Baker's Drive-thru" 'Band Box Diner' 'Barberitos'
 "Barro's Pizza" 'Baskin-Robbins' 'Batter Up' 'BeanThru' 'Beef Villa'
 "Bell's Pizza" 'Belleria Pizzeria' "Ben & Jerry's" "Ben and Jerry's"
 "Bennidito's Pizza" 'Best Gyros' 'Big House Burgers' 'Big Island Pizza'
 'Bistro To Go' 'Blackies Hotdog Stand' 'Blackjack Pizza'
 "Blake's Lotaburger" 'Blimpie' 'Blue Coast Burrito' 'Boardwalk Grill'
 'B

In [6]:
# Rename all locations that have different spelling, punctuation etc
Rest_2018['Name'] = Rest_2018['Name'].replace({"Arby's - Closed":"Arby's",'Arbys': "Arby's", "Auntie Anne's": "Auntie Anne's Pretzels","Baker's Drive Thru":"Baker's Drive-thru","Ben and Jerry's": "Ben & Jerry's",'Bob Evans Restaurant':'Bob Evans',"Bojangles' Famous Chicken 'n Biscuits": 'Bojangles', 'Burger King¬Æ': 'Burger King','Capri Italian Restaurant':'Capri Restaurant',"Carl's Jr.":"Carl's Jr","Carl's Jr. / Green Burrito":"Carl's Jr","Carl's Jr / Green Burrito":"Carl's Jr",'Caseys Carry Out Pizza':"Casey's General Store","Charley's Grilled Subs":'Charleys Philly Steaks',"Checker's Pizza":'Checkers','Chick-fil-A':'Chick-Fil-A', 'Chipotle':'Chipotle Mexican Grill','Dairy Queen (Treat Only)':'Dairy Queen','Dunkin Donuts':"Dunkin' Donuts",'Five Guys Burgers Fries':'Five Guys', 'Five Guys Burgers And Fries':'Five Guys',"Foster's Freeze":'Fosters Freeze', "Hardee's":'Hardees',"Hardee's / Red Burrito":'Hardees', "Hardee's/red Burrito":'Hardees','Jack in the Box':'Jack In The Box', 'Jack in the Box -':'Jack In The Box'})
Rest_2018['Name'] = Rest_2018['Name'].replace({"Jimmy John's":'Jimmy Johns','KFC - Kentucky Fried Chicken':'KFC','KFC/AW':'KFC','KFC/Long John Silvers':'KFC', 'KFC/Taco Bell':'KFC'})
Rest_2018['Name'] = Rest_2018['Name'].replace({"Little Caesar's Pizza":'Little Caesars', 'Little Caesars Pizza':'Little Caesars',"Long John Silver's / AW" 'Long John Silvers':"Long John Silver's",'Long John Silvers / A&W':"Long John Silver's",'Mc Donalds':"McDonald's","McDonald's of Rolesville":"McDonald's",'McDonalds':"McDonald's","McDonalds's":"McDonald's","Mcdonald's":"McDonald's",'Mcdonalds':"McDonald's",'Mcdonalds Whitehouse':"McDonald's"})
Rest_2018['Name'] = Rest_2018['Name'].replace({'Panda Express Innovation Kitchen':'Panda Express' ,'PepperJax Grill':'Pepperjax Grill',"Popeye's Louisiana Kitchen":'Popeyes','Popeyes Chicken & Biscuits':'Popeyes','Popeyes Chicken Biscuits':'Popeyes','Popeyes Louisiana Kitchen':'Popeyes'})
Rest_2018['Name'] = Rest_2018['Name'].replace({'QDOBA Mexican Eats':'Qdoba Mexican Grill', 'Qdoba Mexican Eats':'Qdoba Mexican Grill', "Quizno's": 'Quiznos', 'Quiznos Sub':'Quiznos',"Raising Cane's":'Raising Canes', "Raising Cane's Chicken Fingers":'Raising Canes'})
Rest_2018['Name'] = Rest_2018['Name'].replace({"Rally's Hamburgers":"Rally's", 'Rallys':"Rally's",'Roma Pizza':'Romas Pizza','SONIC Drive In': 'SONIC Drive-In', 'SUBWAY¬Æ':'SUBWAY'})
Rest_2018['Name'] = Rest_2018['Name'].replace({'Sonic':'SONIC Drive-In', "Sonic America's Drive-In":'SONIC Drive-In','Sonic Drive In':'SONIC Drive-In','Sonic Drive in':'SONIC Drive-In','Sonic Drive-In':'SONIC Drive-In',"Steak 'n Shake":'Steak N Shake','Taco Bell / KFC':'Taco Bell' ,'Taco Bell/KFC':'Taco Bell','Taco Bell/Pizza Hut':'Taco Bell'})
Rest_2018['Name'] = Rest_2018['Name'].replace({ 'Wienerschitzel':'Wienerschnitzel','Wingstop Restaurant':'Wingstop',"Wolf's Dairy Queen" :'Dairy Queen',"Zaxby's Chicken Fingers & Buffalo Wings": "Zaxby's",'b.good':'B.GOOD', 'A&W/Long John Silvers':"Long John Silver's"})
# After going through team decided to rename Pizza Hut, Taco Bell and KFC to YUM Brands
Rest_2018['Name'] = Rest_2018['Name'].replace({'Pizza Hut': 'YUM Brands', 'KFC': 'YUM Brands','Taco Bell': 'YUM Brands'})

In [7]:
# Remove locations that are not food related 'Walmart Supercenter''T-Mobile''Hilton Boston Logan Airport''7-Eleven'
# https://thispointer.com/python-pandas-how-to-drop-rows-in-dataframe-by-conditions-on-column-values/
indexNames= Rest_2018[(Rest_2018['Name']=='Walmart Supercenter')].index
Rest_2018.drop(indexNames, inplace=True)

In [8]:
indexNames= Rest_2018[(Rest_2018['Name']=='T-Mobile')].index
Rest_2018.drop(indexNames, inplace=True)

In [9]:
indexNames= Rest_2018[(Rest_2018['Name']=='Hilton Boston Logan Airport')].index
Rest_2018.drop(indexNames, inplace=True)

In [10]:
indexNames= Rest_2018[(Rest_2018['Name']=='7-Eleven')].index
Rest_2018.drop(indexNames, inplace=True)
print(len(Rest_2018))
Rest_2018['Name'].nunique()

9977


491

In [11]:
# find number of entries for each location
Rest_2018['Name'].value_counts()

McDonald's           1953
YUM Brands           1442
Burger King           834
Subway                776
Arby's                669
                     ... 
China Restaurant        1
The Hotdog Shoppe       1
Mr. Chicken             1
Charo Chicken           1
Cobani Gyro Kebab       1
Name: Name, Length: 491, dtype: int64

In [12]:
# Remove all locations that have less than 50 entries
# https://www.thetopsites.net/article/58467286.shtml
threshold = 50
clean_rest_2018 = Rest_2018[Rest_2018.groupby('Name')['Name'].transform('count')>threshold].copy()
print(len(clean_rest_2018))
clean_rest_2018['Name'].value_counts()

8526


McDonald's         1953
YUM Brands         1442
Burger King         834
Subway              776
Arby's              669
Wendy's             628
Jack In The Box     332
Chick-Fil-A         228
Dairy Queen         220
Domino's Pizza      215
SONIC Drive-In      205
Carl's Jr           130
Jimmy Johns         121
Hardees             104
Krystal              82
Dunkin' Donuts       76
Panera Bread         76
SUBWAY               73
Culver's             71
Del Taco             66
Panda Express        62
Steak N Shake        56
In-N-Out Burger      54
Taco John's          53
Name: Name, dtype: int64

In [13]:
clean_rest_2018.to_csv('../Resources/clean_rest_2018.csv', encoding='utf-8', index = False)

In [14]:
# combine with chipotle csv
# first read get file then read into pandas DF
Chipotle_stores = "../Resources/chipotle_stores.csv"
chipotle = pd.read_csv(Chipotle_stores)
chipotle.head(2)
print(len(chipotle))

2629


In [15]:
clean_rest_2018= clean_rest_2018[['Name', 'Address', 'City','State', 'Zip Code','Country', 'Latitude', 'Longitude']]
clean_rest_2018.head(2)
print(len(clean_rest_2018))

8526


In [16]:
combine= [chipotle,clean_rest_2018]
all_rest_df = pd.concat(combine)
print(len(all_rest_df))
all_rest_df.tail()

11155


Unnamed: 0,Name,Address,City,State,Zip Code,Country,Latitude,Longitude
9995,YUM Brands,3460 Robinhood Rd,Winston-Salem,NC,27106,US,36.117563,-80.316553
9996,YUM Brands,3069 Kernersville Rd,Winston-Salem,NC,27107,US,36.077718,-80.176748
9997,YUM Brands,838 S Main St,Kernersville,NC,27284,US,36.111015,-80.089165
9998,YUM Brands,1702 Glendale Dr SW,Wilson,NC,27893,US,35.719981,-77.945795
9999,YUM Brands,1405 W Broad St,Elizabethtown,NC,28337,US,34.632778,-78.624615


In [20]:
all_rest_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11155 entries, 0 to 9999
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Name       11155 non-null  object 
 1   Address    11155 non-null  object 
 2   City       11155 non-null  object 
 3   State      11155 non-null  object 
 4   Zip Code   11155 non-null  object 
 5   Country    11155 non-null  object 
 6   Latitude   11155 non-null  float64
 7   Longitude  11155 non-null  float64
dtypes: float64(2), object(6)
memory usage: 784.3+ KB


In [22]:
all_rest_df= all_rest_df.rename(columns={'Zip Code': 'Zipcode'})
all_rest_df['Zipcode']=all_rest_df.Zipcode.astype(float)
all_rest_df.info()

ValueError: could not convert string to float: 'e 3001'

In [18]:
census_2018=census_2018.rename(columns={'Zipcode': 'Zip Code'})
census_2018.head()

Unnamed: 0,Zip Code,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,601.0,17242.0,40.5,13092.0,6999.0,10772.0,62.475351
1,602.0,38442.0,42.3,16358.0,9277.0,19611.0,51.014515
2,603.0,48814.0,41.1,16603.0,11307.0,24337.0,49.856599
3,606.0,6437.0,43.3,12832.0,5943.0,4163.0,64.672984
4,610.0,27073.0,42.1,19309.0,10220.0,11724.0,43.305138


In [19]:
# Add census data to right of all rest data
# https://stackoverflow.com/questions/17978133/python-pandas-merge-only-certain-columns
all_rest_df = pd.merge(all_rest_df,census_2018, on="Zip Code", how='right')
print(len(all_rest_df))
all_rest_df.head(150)

ValueError: You are trying to merge on object and float64 columns. If you wish to proceed you should use pd.concat

In [None]:
# drop all Nan values in address Column
all_rest_df.dropna(subset=['Latitude'], inplace=True)
all_rest_df

In [None]:
# wound up combinging via excel, could not get to combine without duplicate columns for address, city, etc. need to talk about at class

all_rest_df = "../Resources/All_rest_2018.csv"
all_rest_df = pd.read_csv(all_rest_df)
all_rest_df.head()

In [None]:
all_rest_df['Population'].describe()


In [None]:
print(len(all_rest_df))
all_rest_df.head()

In [None]:
print(len(census_2018))

In [None]:
census_2018=census_2018.dropna(how='any')
print(len(census_2018))
census_2018.head(2)

In [None]:
# api_key = g_key
# single= requests.get("https://maps.googleapis.com/maps/api/geocode/json?key=" + api_key +"&components=postal_code:97403")
# single_response = single.json()

# print(json.dumps(single_response, indent=4, sort_keys=True))

In [None]:
# lat = single_response["results"][0]["geometry"]["location"]["lat"]
# lng = single_response["results"][0]["geometry"]["location"]["lng"]
# print(lat,lng)

Commented out the below cell, while i was able to get the correct call for the API(as above) i was not able to return any zipcodes. all went straight to Zip not found. I then found a dataset with the information and plugged it into our clean census data. https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/

In [None]:
# https://stackoverflow.com/questions/5585957/get-latlng-from-zip-code-google-maps-api
# Turn census zip codes into lat/long combinations for heatmap of US
# https://maps.googleapis.com/maps/api/geocode/json?key=YOUR_API_KEY&components=postal_code:97403
# Lat=[]
# Long=[]

# url = 'https://maps.googleapis.com/maps/api/geocode/json?key='
# api_key = g_key
# zipcode = census_2018['Zipcode']

# for zips in zipcode:
#     print(f'{url}{api_key}&components=postal_code:{zips}')
# #     try:
#         pop_request = requests.get(f'{url}{api_key}&components=postal_code:{zips}')
# # (url + api_key +'&components=postal_code:'+zips)
#         pop_response=pop_request.json()
#         Lat.append(pop_response["results"][0]["geometry"]["location"]["lat"])
#         Long.append(pop_response["results"][0]["geometry"]["location"]["lng"])
#         print(f'Success')
#     except IndexError:
#         print(f'Zip not found')

    


In [None]:
# configure gmaps with API key
gmaps.configure(api_key =g_key)

In [None]:
# Store lat/long into locations
locations = census_2018[['Latitude', 'Longitude']]

# Store humidity as weight
weight = census_2018['Population']

# Set Chipotle markers
chip_locations = chipotle[['Latitude', 'Longitude']]
# Set McDonalds markers
mcdon_locations = all_rest_df.loc[all_rest_df['Name']=='McDonald\'s'] 
mcdonlatlong=mcdon_locations[['Latitude', 'Longitude']]
# Set yum brand locations (Taco Bell, Pizza Hut and KFC)
taco_locations = all_rest_df.loc[all_rest_df['Name']=='Taco Bell']
tacolatlong=taco_locations[['Latitude', 'Longitude']]
pizza_locations = all_rest_df.loc[all_rest_df['Name']=='Pizza Hut']
pizzalatlong=pizza_locations[['Latitude', 'Longitude']]
kfc_locations = all_rest_df.loc[all_rest_df['Name']=='KFC']
kfclatlong=kfc_locations[['Latitude', 'Longitude']]

In [None]:
fig = gmaps.figure()

heat_layer=gmaps.heatmap_layer(locations, weight, dissipating=True, max_intensity=25, point_radius=2)

chip_layer = gmaps.symbol_layer(
    chip_locations, fill_color='black', stroke_color='black', scale=1)

taco_layer = gmaps.symbol_layer(
    tacolatlong, fill_color='green', stroke_color='green', scale=1)

pizza_layer = gmaps.symbol_layer(
    pizzalatlong, fill_color='green', stroke_color='green', scale=1)

kfc_layer = gmaps.symbol_layer(
    kfclatlong, fill_color='green', stroke_color='green', scale=1)

fig.add_layer(heat_layer)
fig.add_layer(chip_layer)
fig.add_layer(taco_layer)
fig.add_layer(pizza_layer)
fig.add_layer(kfc_layer)

fig

In [None]:
# Set up binns for population on all_rest_df
bins = [0, 20000, 40000, 60000, 80000, 100000,120000]
group_names=['0-20K', '20k - 40k', '40k - 60k', '60k - 80k', '80k - 100k', '100k - 120k']
all_rest_df['Zip Code Population Bins'] = pd.cut(all_rest_df['Population'], bins, labels = group_names, include_lowest =True)
all_rest_df['Zip Code Population Bins'].value_counts()


In [None]:
all_rest_pop = all_rest_df.groupby(['Name','Zip Code Population Bins'])
print(all_rest_pop['Zip Code Population Bins'].count())

In [None]:
chart_all_rest = all_rest_df.loc[(all_rest_df['Name']== 'Chipotle Mexican Grill')|(all_rest_df['Name']== 'YUM Brands')|(all_rest_df['Name']=='Burger King')|(all_rest_df['Name']=='McDonald\'s')]
chart_all_rest.head(2)

In [None]:
# https://stackoverflow.com/questions/53997862/pandas-groupby-two-columns-and-plot
sns_plot = sns.countplot(data=chart_all_rest,x='Zip Code Population Bins',hue='Name')
figure = sns_plot.get_figure()
figure.savefig('Population_Bins_and_Locations.png')
# sns_plot.savefig('Population_Bins_and_Locations.png')

In [None]:
census_2018['Population'].describe()

In [None]:
bins = [0, 20000, 40000, 60000, 80000, 100000,120000]
group_names=['0-20K', '20k - 40k', '40k - 60k', '60k - 80k', '80k - 100k', '100k - 120k']
census_2018['Zip Code Population Bins'] = pd.cut(census_2018['Population'], bins, labels = group_names, include_lowest =True)
census_2018['Zip Code Population Bins'].value_counts()

In [None]:
sns_plot = sns.countplot(data=census_2018,x='Zip Code Population Bins',hue='Zip Code Population Bins')
figure = sns_plot.get_figure()
figure.savefig('Census_Population_Bins_and_Locations.png')