In [31]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
import requests
import json
from census import Census

In [33]:
# Import the Fast Food dataset, drop missing data
fast_food_data = pd.read_csv('resources/fast_food_data_us.csv')
fast_food_data = fast_food_data.dropna()
fast_food_data.head()

Unnamed: 0,index,address,categories,city,country,latitude,longitude,name,postalCode,province
0,0,800 N Canal Blvd,American Restaurant and Fast Food Restaurant,Thibodaux,US,29.814697,-90.814742,SONIC Drive In,70301,LA
1,1,800 N Canal Blvd,Fast Food Restaurants,Thibodaux,US,29.814697,-90.814742,SONIC Drive In,70301,LA
2,2,206 Wears Valley Rd,Fast Food Restaurant,Pigeon Forge,US,35.803788,-83.580553,Taco Bell,37863,TN
3,3,3652 Parkway,Fast Food,Pigeon Forge,US,35.782339,-83.551408,Arby's,37863,TN
4,4,2118 Mt Zion Parkway,Fast Food Restaurant,Morrow,US,33.562738,-84.321143,Steak 'n Shake,30260,GA


In [13]:
#find duplicate resturants by address
duplicate_fast_food = fast_food_data[fast_food_data.duplicated(['address', 'name'])]
duplicate_fast_food

Unnamed: 0,index,address,categories,city,country,latitude,longitude,name,postalCode,province
1,1,800 N Canal Blvd,Fast Food Restaurants,Thibodaux,US,29.814697,-90.814742,SONIC Drive In,70301,LA
31,31,105 Jericho Tpke,Fast Food,Jericho,US,40.780233,-73.559340,McDonald's,11753,NY
51,51,59 W Main St,Fast Food,Spencer,US,42.233410,-72.012516,McDonald's,1562,MA
72,72,706 Fletcher Pkwy,Fast Food Restaurant,El Cajon,US,32.808270,-116.971370,Boston Market,92020,CA
82,82,7011 Barker Cypress Rd,Fast Food Restaurant and Burger Joint,Cypress,US,29.880810,-95.683922,Burger King,77433,TX
...,...,...,...,...,...,...,...,...,...,...
9899,9899,9515 Candelaria Rd NE,Fast Food Restaurants,Albuquerque,US,35.116500,-106.534030,McDonalds,87112,NM
9929,9929,4117 N Josey Ln,Fried Chicken Joint and Fast Food Restaurant,Carrollton,US,33.023748,-96.885564,KFC,75007,TX
9938,9938,28670 Northwestern Hwy,Fast Food Restaurant,Southfield,US,42.499260,-83.303460,McDonald's,48034,MI
9940,9940,205 Back River Neck Rd,Fast Food,Essex,US,39.307620,-76.443300,McDonald's,21221,MD


In [14]:
#remove duplicates and save as new df
clean_fast_food = fast_food_data.drop_duplicates('address')
clean_fast_food.head()

Unnamed: 0,index,address,categories,city,country,latitude,longitude,name,postalCode,province
0,0,800 N Canal Blvd,American Restaurant and Fast Food Restaurant,Thibodaux,US,29.814697,-90.814742,SONIC Drive In,70301,LA
2,2,206 Wears Valley Rd,Fast Food Restaurant,Pigeon Forge,US,35.803788,-83.580553,Taco Bell,37863,TN
3,3,3652 Parkway,Fast Food,Pigeon Forge,US,35.782339,-83.551408,Arby's,37863,TN
4,4,2118 Mt Zion Parkway,Fast Food Restaurant,Morrow,US,33.562738,-84.321143,Steak 'n Shake,30260,GA
5,5,9768 Grand River Ave,Fast Food Restaurant,Detroit,US,42.368823,-83.138251,Wendy's,48204,MI


In [15]:
## We need to convert the states to state codes in order to work with them - possible solution https://pypi.org/project/us/
states = {"AL":"Alabama",
"AK":"Alaska",
"AZ":"Arizona",
"AR":"Arkansas",
"CA":"California",
"CO":"Colorado",
"CT":"Connecticut",
"DE":"Delaware",
"FL":"Florida",
"GA":"Georgia",
"HI":"Hawaii",
"ID":"Idaho",
"IL":"Illinois",
"IN":"Indiana",
"IA":"Iowa",
"KS":"Kansas",
"KY":"Kentucky",
"LA":"Louisiana",
"ME":"Maine",
"MD":"Maryland",
"MA":"Massachusetts",
"MI":"Michigan",
"MN":"Minnesota",
"MS":"Mississippi",
"MO":"Missouri",
"MT":"Montana",
"NE":"Nebraska",
"NV":"Nevada",
"NH":"New Hampshire",
"NJ":"New Jersey",
"NM":"New Mexico",
"NY":"New York",
"NC":"North Carolina",
"ND":"North Dakota",
"OH":"Ohio",
"OK":"Oklahoma",
"OR":"Oregon",
"PA":"Pennsylvania",
"RI":"Rhode Island",
"SC":"South Carolina",
"SD":"South Dakota",
"TN":"Tennessee",
"TX":"Texas",
"UT":"Utah",
"VT":"Vermont",
"VA":"Virginia",
"WA":"Washington",
"WV":"West Virginia",
"WI":"Wisconsin",
"WY":"Wyoming"}

clean_fast_food['province'].replace(states, inplace=True)
clean_fast_food.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().replace(


Unnamed: 0,index,address,categories,city,country,latitude,longitude,name,postalCode,province
0,0,800 N Canal Blvd,American Restaurant and Fast Food Restaurant,Thibodaux,US,29.814697,-90.814742,SONIC Drive In,70301,Louisiana
2,2,206 Wears Valley Rd,Fast Food Restaurant,Pigeon Forge,US,35.803788,-83.580553,Taco Bell,37863,Tennessee
3,3,3652 Parkway,Fast Food,Pigeon Forge,US,35.782339,-83.551408,Arby's,37863,Tennessee
4,4,2118 Mt Zion Parkway,Fast Food Restaurant,Morrow,US,33.562738,-84.321143,Steak 'n Shake,30260,Georgia
5,5,9768 Grand River Ave,Fast Food Restaurant,Detroit,US,42.368823,-83.138251,Wendy's,48204,Michigan


In [16]:
#renamed column from province to state for merge
clean_fast_food.columns = ['Index',
'Address',
'Category',
'City',
'Country',
'Latitude',
'Longitude',
'Name',
'ZipCode',
'State']
clean_fast_food.head()

Unnamed: 0,Index,Address,Category,City,Country,Latitude,Longitude,Name,ZipCode,State
0,0,800 N Canal Blvd,American Restaurant and Fast Food Restaurant,Thibodaux,US,29.814697,-90.814742,SONIC Drive In,70301,Louisiana
2,2,206 Wears Valley Rd,Fast Food Restaurant,Pigeon Forge,US,35.803788,-83.580553,Taco Bell,37863,Tennessee
3,3,3652 Parkway,Fast Food,Pigeon Forge,US,35.782339,-83.551408,Arby's,37863,Tennessee
4,4,2118 Mt Zion Parkway,Fast Food Restaurant,Morrow,US,33.562738,-84.321143,Steak 'n Shake,30260,Georgia
5,5,9768 Grand River Ave,Fast Food Restaurant,Detroit,US,42.368823,-83.138251,Wendy's,48204,Michigan


In [17]:
clean_fast_food.to_csv('resources/clean_fast_food.csv', encoding='utf-8', index=False)

In [18]:
# Import the Obesity dataset, drop missing data

obesity_data = pd.read_csv('resources/obesity_data_state_2020.csv')
obesity_data = obesity_data.dropna()
obesity_data.head()

Unnamed: 0,State,Prevalence,95% CI
0,Alabama,39.0,"(37.3, 40.8)"
1,Alaska,31.9,"(29.4, 34.4)"
2,Arizona,30.9,"(29.5, 32.3)"
3,Arkansas,36.4,"(34.5, 38.4)"
4,California,30.3,"(28.3, 32.2)"


In [19]:
#merged clean fast food df with obesity df
fast_food_obesity= pd.merge(clean_fast_food, obesity_data, how = 'inner', on = 'State')
fast_food_obesity.head()

Unnamed: 0,Index,Address,Category,City,Country,Latitude,Longitude,Name,ZipCode,State,Prevalence,95% CI
0,0,800 N Canal Blvd,American Restaurant and Fast Food Restaurant,Thibodaux,US,29.814697,-90.814742,SONIC Drive In,70301,Louisiana,38.1,"(36.2, 40.1)"
1,328,1737 N University Ave,Fast Food Restaurants,Lafayette,US,30.252668,-92.037149,McDonald's,70507,Louisiana,38.1,"(36.2, 40.1)"
2,363,1051 W Maple Ave,Fast Food Restaurants,Eunice,US,30.48697,-92.42648,Krispy Krunchy Chicken,70535,Louisiana,38.1,"(36.2, 40.1)"
3,428,3000 Maplewood Dr,Fast Food Restaurants,Sulphur,US,30.227514,-93.343155,Pizza Hut,70663,Louisiana,38.1,"(36.2, 40.1)"
4,434,6464 Siegen Ln,Fast Food Restaurants,Baton Rouge,US,30.39099,-91.06156,Arby's,70809,Louisiana,38.1,"(36.2, 40.1)"


In [20]:
fast_food_obesity.to_csv('resources/clean_fast_obesity.csv', encoding='utf-8', index=False)

In [21]:
### Data Cleanup 
# Census data
# Census API Key

url = 'http://api.census.gov/data/2020/dec/pl?get=NAME,H1_001N&for=state:*'

from config import api_key
c = Census(api_key, year=2021)

census_data = requests.get(url).json()

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)
census_pd.columns = ["State", "Total Population", "State Number"]
census_pd

Unnamed: 0,State,Total Population,State Number
0,NAME,H1_001N,state
1,Alabama,2288330,01
2,Alaska,326200,02
3,Arizona,3082000,04
4,Arkansas,1365265,05
5,California,14392140,06
6,Colorado,2491404,08
7,Connecticut,1530197,09
8,Delaware,448735,10
9,District of Columbia,350364,11


In [22]:
census_pd = census_pd.drop(census_pd.index[0])
census_pd

Unnamed: 0,State,Total Population,State Number
1,Alabama,2288330,1
2,Alaska,326200,2
3,Arizona,3082000,4
4,Arkansas,1365265,5
5,California,14392140,6
6,Colorado,2491404,8
7,Connecticut,1530197,9
8,Delaware,448735,10
9,District of Columbia,350364,11
10,Idaho,751859,16


In [23]:
#merge tables by state and create data frame including state, fast food chain, obesity rate
fast_obesity_census= pd.merge(fast_food_obesity, census_pd, how = 'inner', on = 'State')
fast_obesity_census.head()

Unnamed: 0,Index,Address,Category,City,Country,Latitude,Longitude,Name,ZipCode,State,Prevalence,95% CI,Total Population,State Number
0,0,800 N Canal Blvd,American Restaurant and Fast Food Restaurant,Thibodaux,US,29.814697,-90.814742,SONIC Drive In,70301,Louisiana,38.1,"(36.2, 40.1)",2073200,22
1,328,1737 N University Ave,Fast Food Restaurants,Lafayette,US,30.252668,-92.037149,McDonald's,70507,Louisiana,38.1,"(36.2, 40.1)",2073200,22
2,363,1051 W Maple Ave,Fast Food Restaurants,Eunice,US,30.48697,-92.42648,Krispy Krunchy Chicken,70535,Louisiana,38.1,"(36.2, 40.1)",2073200,22
3,428,3000 Maplewood Dr,Fast Food Restaurants,Sulphur,US,30.227514,-93.343155,Pizza Hut,70663,Louisiana,38.1,"(36.2, 40.1)",2073200,22
4,434,6464 Siegen Ln,Fast Food Restaurants,Baton Rouge,US,30.39099,-91.06156,Arby's,70809,Louisiana,38.1,"(36.2, 40.1)",2073200,22


In [24]:
#dropped index and state number
clean_fast_obesity_census = fast_obesity_census.drop(['Index', 'State Number'], axis = 1)
clean_fast_obesity_census

Unnamed: 0,Address,Category,City,Country,Latitude,Longitude,Name,ZipCode,State,Prevalence,95% CI,Total Population
0,800 N Canal Blvd,American Restaurant and Fast Food Restaurant,Thibodaux,US,29.814697,-90.814742,SONIC Drive In,70301,Louisiana,38.1,"(36.2, 40.1)",2073200
1,1737 N University Ave,Fast Food Restaurants,Lafayette,US,30.252668,-92.037149,McDonald's,70507,Louisiana,38.1,"(36.2, 40.1)",2073200
2,1051 W Maple Ave,Fast Food Restaurants,Eunice,US,30.486970,-92.426480,Krispy Krunchy Chicken,70535,Louisiana,38.1,"(36.2, 40.1)",2073200
3,3000 Maplewood Dr,Fast Food Restaurants,Sulphur,US,30.227514,-93.343155,Pizza Hut,70663,Louisiana,38.1,"(36.2, 40.1)",2073200
4,6464 Siegen Ln,Fast Food Restaurants,Baton Rouge,US,30.390990,-91.061560,Arby's,70809,Louisiana,38.1,"(36.2, 40.1)",2073200
...,...,...,...,...,...,...,...,...,...,...,...,...
9297,850 Manton Ave,Fast Food Restaurants,Providence,US,41.832526,-71.465840,Wendy's,2909,Rhode Island,30.1,"(28.2, 32.1)",483474
9298,580 Chalkstone Ave,Fast Food Restaurants,Providence,US,41.834918,-71.426043,Pizza Hut,2908,Rhode Island,30.1,"(28.2, 32.1)",483474
9299,4000 Chapel View Boulevard,Fast Food Restaurants,Cranston,US,41.756170,-71.458551,Panera Bread,2920,Rhode Island,30.1,"(28.2, 32.1)",483474
9300,650 Bald Hill Rd,Fast Food Restaurants,Warwick,US,41.720180,-71.483700,SUBWAY,2886,Rhode Island,30.1,"(28.2, 32.1)",483474


In [25]:
clean_fast_obesity_census.to_csv('resources/clean_all_combined.csv', encoding='utf-8', index=False)