In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import os
import json
import seaborn as sns


# Import API key
from config import g_key

In [None]:
# Files to Load (Remember to Change These)
fast_food_restaurants_2018 = "../Resources/Fast_Food_Restaurants_2018.csv"
census_2018 ="../Resources/census_data_clean_2018.csv"
# Read files into Pandas DF
Rest_2018 = pd.read_csv(fast_food_restaurants_2018)
census_2018=pd.read_csv(census_2018)

In [None]:
# remove columns we will not need
Rest_2018=Rest_2018.drop(['id','sourceURLs','websites'], axis =1)
Rest_2018.columns

In [None]:
# reorganize/rename columns
Rest_2018 = Rest_2018.rename(columns={'name':'Name', 'address': 'Address', 'city': 'City', 'country': 'Country', 'latitude': 'Latitude', 'longitude':'Longitude', 'postalCode': 'Zip Code',\
                                     'province': 'State'})
Rest_2018 = Rest_2018[['Name', 'Address', 'City','State', 'Zip Code','Country', 'Latitude', 'Longitude']]
Rest_2018.head()

In [None]:
# get unique names in list and sort alphabetically to assist with cleaning
names = Rest_2018['Name'].unique()
names.sort()
print(len(names))
print(names)

In [None]:
# Rename all locations that have different spelling, punctuation etc
Rest_2018['Name'] = Rest_2018['Name'].replace({"Arby's - Closed":"Arby's",'Arbys': "Arby's", "Auntie Anne's": "Auntie Anne's Pretzels","Baker's Drive Thru":"Baker's Drive-thru","Ben and Jerry's": "Ben & Jerry's",'Bob Evans Restaurant':'Bob Evans',"Bojangles' Famous Chicken 'n Biscuits": 'Bojangles', 'Burger King¬Æ': 'Burger King','Capri Italian Restaurant':'Capri Restaurant',"Carl's Jr.":"Carl's Jr","Carl's Jr. / Green Burrito":"Carl's Jr","Carl's Jr / Green Burrito":"Carl's Jr",'Caseys Carry Out Pizza':"Casey's General Store","Charley's Grilled Subs":'Charleys Philly Steaks',"Checker's Pizza":'Checkers','Chick-fil-A':'Chick-Fil-A', 'Chipotle':'Chipotle Mexican Grill','Dairy Queen (Treat Only)':'Dairy Queen','Dunkin Donuts':"Dunkin' Donuts",'Five Guys Burgers Fries':'Five Guys', 'Five Guys Burgers And Fries':'Five Guys',"Foster's Freeze":'Fosters Freeze', "Hardee's":'Hardees',"Hardee's / Red Burrito":'Hardees', "Hardee's/red Burrito":'Hardees','Jack in the Box':'Jack In The Box', 'Jack in the Box -':'Jack In The Box'})
Rest_2018['Name'] = Rest_2018['Name'].replace({"Jimmy John's":'Jimmy Johns','KFC - Kentucky Fried Chicken':'KFC','KFC/AW':'KFC','KFC/Long John Silvers':'KFC', 'KFC/Taco Bell':'KFC'})
Rest_2018['Name'] = Rest_2018['Name'].replace({"Little Caesar's Pizza":'Little Caesars', 'Little Caesars Pizza':'Little Caesars',"Long John Silver's / AW" 'Long John Silvers':"Long John Silver's",'Long John Silvers / A&W':"Long John Silver's",'Mc Donalds':"McDonald's","McDonald's of Rolesville":"McDonald's",'McDonalds':"McDonald's","McDonalds's":"McDonald's","Mcdonald's":"McDonald's",'Mcdonalds':"McDonald's",'Mcdonalds Whitehouse':"McDonald's"})
Rest_2018['Name'] = Rest_2018['Name'].replace({'Panda Express Innovation Kitchen':'Panda Express' ,'PepperJax Grill':'Pepperjax Grill',"Popeye's Louisiana Kitchen":'Popeyes','Popeyes Chicken & Biscuits':'Popeyes','Popeyes Chicken Biscuits':'Popeyes','Popeyes Louisiana Kitchen':'Popeyes'})
Rest_2018['Name'] = Rest_2018['Name'].replace({'QDOBA Mexican Eats':'Qdoba Mexican Grill', 'Qdoba Mexican Eats':'Qdoba Mexican Grill', "Quizno's": 'Quiznos', 'Quiznos Sub':'Quiznos',"Raising Cane's":'Raising Canes', "Raising Cane's Chicken Fingers":'Raising Canes'})
Rest_2018['Name'] = Rest_2018['Name'].replace({"Rally's Hamburgers":"Rally's", 'Rallys':"Rally's",'Roma Pizza':'Romas Pizza','SONIC Drive In': 'SONIC Drive-In', 'SUBWAY¬Æ':'SUBWAY'})
Rest_2018['Name'] = Rest_2018['Name'].replace({'Sonic':'SONIC Drive-In', "Sonic America's Drive-In":'SONIC Drive-In','Sonic Drive In':'SONIC Drive-In','Sonic Drive in':'SONIC Drive-In','Sonic Drive-In':'SONIC Drive-In',"Steak 'n Shake":'Steak N Shake','Taco Bell / KFC':'Taco Bell' ,'Taco Bell/KFC':'Taco Bell','Taco Bell/Pizza Hut':'Taco Bell'})
Rest_2018['Name'] = Rest_2018['Name'].replace({ 'Wienerschitzel':'Wienerschnitzel','Wingstop Restaurant':'Wingstop',"Wolf's Dairy Queen" :'Dairy Queen',"Zaxby's Chicken Fingers & Buffalo Wings": "Zaxby's",'b.good':'B.GOOD', 'A&W/Long John Silvers':"Long John Silver's"})
# After going through team decided to rename Pizza Hut, Taco Bell and KFC to YUM Brands
Rest_2018['Name'] = Rest_2018['Name'].replace({'Pizza Hut': 'YUM Brands', 'KFC': 'YUM Brands','Taco Bell': 'YUM Brands'})

In [None]:
# Remove locations that are not food related 'Walmart Supercenter''T-Mobile''Hilton Boston Logan Airport''7-Eleven'
# https://thispointer.com/python-pandas-how-to-drop-rows-in-dataframe-by-conditions-on-column-values/
indexNames= Rest_2018[(Rest_2018['Name']=='Walmart Supercenter')].index
Rest_2018.drop(indexNames, inplace=True)

In [None]:
indexNames= Rest_2018[(Rest_2018['Name']=='T-Mobile')].index
Rest_2018.drop(indexNames, inplace=True)

In [None]:
indexNames= Rest_2018[(Rest_2018['Name']=='Hilton Boston Logan Airport')].index
Rest_2018.drop(indexNames, inplace=True)

In [None]:
indexNames= Rest_2018[(Rest_2018['Name']=='7-Eleven')].index
Rest_2018.drop(indexNames, inplace=True)
print(len(Rest_2018))
Rest_2018['Name'].nunique()

In [None]:
# find number of entries for each location
Rest_2018['Name'].value_counts()

In [None]:
# Remove all locations that have less than 50 entries
# https://www.thetopsites.net/article/58467286.shtml
threshold = 50
clean_rest_2018 = Rest_2018[Rest_2018.groupby('Name')['Name'].transform('count')>threshold].copy()
print(len(clean_rest_2018))
clean_rest_2018['Name'].value_counts()

In [None]:
clean_rest_2018.to_csv('../Resources/clean_rest_2018.csv', encoding='utf-8', index = False)
clean_rest_2018.head(2)

In [None]:
# combine with chipotle csv
# first read get file then read into pandas DF
Chipotle_stores = "../Resources/chipotle_stores.csv"
chipotle = pd.read_csv(Chipotle_stores)
chipotle.head(2)

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html
combine= [chipotle,clean_rest_2018]
all_rest_df = pd.concat(combine, axis=0)
all_rest_df

In [None]:
census_2018.head(2)

In [None]:
# Add census data to right of all rest data
# https://data-flair.training/blogs/pandas-merge-and-join/
# https://stackoverflow.com/questions/17978133/python-pandas-merge-only-certain-columns
# Ended up in a for loop because merging was not getting the information to populate correctly or was deleting a majority of the dataframe information
# all_rest_df = pd.merge(all_rest_df,census_2018[['Longitude','Population', 'Median Age', 'Household Income', 'Per Capita Income', 'Poverty Count', 'Poverty Rate']], on="Longitude", how='left')
# # all_rest= pd.merge(all_rest_df,census_2018[['Longitude', 'Population']], on ='Longitude', how='left')
# all_rest_df = pd.merge(all_rest_df,census_2018, on=['Latitude', 'Longitude'])
Population=[]
Median_age = []
Household_Income = []
Per_Capita_Income = []
Poverty_Count = []
Poverty_Rate = []







In [None]:
# export to csv
# all_rest_df.to_csv("../Resources/All_rest_2018.csv, index=False, header=True")

In [None]:
# wound up combinging via excel, could not get to combine without duplicate columns for address, city, etc. need to talk about at class

all_rest_df = "../Resources/All_rest_2018.csv"
all_rest_df = pd.read_csv(all_rest_df)
all_rest_df.head()

In [None]:
all_rest_df['Population'].describe()


In [None]:
print(len(all_rest_df))
all_rest_df.head()

In [None]:
print(len(census_2018))

In [None]:
census_2018=census_2018.dropna(how='any')
print(len(census_2018))
census_2018.head(2)

In [None]:
# api_key = g_key
# single= requests.get("https://maps.googleapis.com/maps/api/geocode/json?key=" + api_key +"&components=postal_code:97403")
# single_response = single.json()

# print(json.dumps(single_response, indent=4, sort_keys=True))

In [None]:
# lat = single_response["results"][0]["geometry"]["location"]["lat"]
# lng = single_response["results"][0]["geometry"]["location"]["lng"]
# print(lat,lng)

Commented out the below cell, while i was able to get the correct call for the API(as above) i was not able to return any zipcodes. all went straight to Zip not found. I then found a dataset with the information and plugged it into our clean census data. https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/

In [None]:
# https://stackoverflow.com/questions/5585957/get-latlng-from-zip-code-google-maps-api
# Turn census zip codes into lat/long combinations for heatmap of US
# https://maps.googleapis.com/maps/api/geocode/json?key=YOUR_API_KEY&components=postal_code:97403
# Lat=[]
# Long=[]

# url = 'https://maps.googleapis.com/maps/api/geocode/json?key='
# api_key = g_key
# zipcode = census_2018['Zipcode']

# for zips in zipcode:
#     print(f'{url}{api_key}&components=postal_code:{zips}')
# #     try:
#         pop_request = requests.get(f'{url}{api_key}&components=postal_code:{zips}')
# # (url + api_key +'&components=postal_code:'+zips)
#         pop_response=pop_request.json()
#         Lat.append(pop_response["results"][0]["geometry"]["location"]["lat"])
#         Long.append(pop_response["results"][0]["geometry"]["location"]["lng"])
#         print(f'Success')
#     except IndexError:
#         print(f'Zip not found')

    


In [None]:
# configure gmaps with API key
gmaps.configure(api_key =g_key)

In [None]:
# Store lat/long into locations
locations = census_2018[['Latitude', 'Longitude']]

# Store humidity as weight
weight = census_2018['Population']

# Set Chipotle markers
chip_locations = chipotle[['Latitude', 'Longitude']]
# Set McDonalds markers
mcdon_locations = all_rest_df.loc[all_rest_df['Name']=='McDonald\'s'] 
mcdonlatlong=mcdon_locations[['Latitude', 'Longitude']]
# Set yum brand locations (Taco Bell, Pizza Hut and KFC)
taco_locations = all_rest_df.loc[all_rest_df['Name']=='Taco Bell']
tacolatlong=taco_locations[['Latitude', 'Longitude']]
pizza_locations = all_rest_df.loc[all_rest_df['Name']=='Pizza Hut']
pizzalatlong=pizza_locations[['Latitude', 'Longitude']]
kfc_locations = all_rest_df.loc[all_rest_df['Name']=='KFC']
kfclatlong=kfc_locations[['Latitude', 'Longitude']]

In [None]:
fig = gmaps.figure()

heat_layer=gmaps.heatmap_layer(locations, weight, dissipating=True, max_intensity=25, point_radius=2)

chip_layer = gmaps.symbol_layer(
    chip_locations, fill_color='black', stroke_color='black', scale=1)

taco_layer = gmaps.symbol_layer(
    tacolatlong, fill_color='green', stroke_color='green', scale=1)

pizza_layer = gmaps.symbol_layer(
    pizzalatlong, fill_color='green', stroke_color='green', scale=1)

kfc_layer = gmaps.symbol_layer(
    kfclatlong, fill_color='green', stroke_color='green', scale=1)

fig.add_layer(heat_layer)
fig.add_layer(chip_layer)
fig.add_layer(taco_layer)
fig.add_layer(pizza_layer)
fig.add_layer(kfc_layer)

fig

In [None]:
# Set up binns for population on all_rest_df
bins = [0, 20000, 40000, 60000, 80000, 100000,120000]
group_names=['0-20K', '20k - 40k', '40k - 60k', '60k - 80k', '80k - 100k', '100k - 120k']
all_rest_df['Zip Code Population Bins'] = pd.cut(all_rest_df['Population'], bins, labels = group_names, include_lowest =True)
all_rest_df['Zip Code Population Bins'].value_counts()


In [None]:
all_rest_pop = all_rest_df.groupby(['Name','Zip Code Population Bins'])
print(all_rest_pop['Zip Code Population Bins'].count())

In [None]:
chart_all_rest = all_rest_df.loc[(all_rest_df['Name']== 'Chipotle Mexican Grill')|(all_rest_df['Name']== 'YUM Brands')|(all_rest_df['Name']=='Burger King')|(all_rest_df['Name']=='McDonald\'s')]
chart_all_rest.head(2)

In [None]:
# https://stackoverflow.com/questions/53997862/pandas-groupby-two-columns-and-plot
sns_plot = sns.countplot(data=chart_all_rest,x='Zip Code Population Bins',hue='Name')
figure = sns_plot.get_figure()
figure.savefig('Population_Bins_and_Locations.png')
# sns_plot.savefig('Population_Bins_and_Locations.png')

In [None]:
census_2018['Population'].describe()

In [None]:
bins = [0, 20000, 40000, 60000, 80000, 100000,120000]
group_names=['0-20K', '20k - 40k', '40k - 60k', '60k - 80k', '80k - 100k', '100k - 120k']
census_2018['Zip Code Population Bins'] = pd.cut(census_2018['Population'], bins, labels = group_names, include_lowest =True)
census_2018['Zip Code Population Bins'].value_counts()

In [None]:
sns_plot = sns.countplot(data=census_2018,x='Zip Code Population Bins',hue='Zip Code Population Bins')
figure = sns_plot.get_figure()
figure.savefig('Census_Population_Bins_and_Locations.png')

In [None]:
# Scatter plot for Chipotle locations vs population
chipbyzip= all_rest_df

In [None]:
northern_hem = city_data.loc[city_data['Latitude']>=0]

x_values = northern_hem['Latitude']
y_values = northern_hem['Temperature']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values, y_values, marker="o", facecolors="green", edgecolors="black",
            s=30, alpha=0.75)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-9,30),fontsize=15,color="red")
plt.xlim(-10, 85)
plt.ylim(10,95 )
plt.title('City Northern Hemisphere Latitude vs Temperature (10/10/2020)')
plt.xlabel('Latitude')
plt.ylabel('Tempurature (F)')
plt.savefig('Outputs/Lat_vs_Temp_Northern.png')

print(f'The r-value is: {rvalue}')

plt.show()


