In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
import requests
import json
from census import Census

In [None]:
# Import the Fast Food dataset, drop missing data
fast_food_data = pd.read_csv('resources/fast_food_data_us.csv')
fast_food_data = fast_food_data.dropna()
fast_food_data.head()


In [None]:
#find duplicate resturants by address
duplicate_fast_food = fast_food_data[fast_food_data.duplicated(['address', 'name'])]
duplicate_fast_food

In [None]:
#remove duplicates and save as new df
clean_fast_food = fast_food_data.drop_duplicates('address')
clean_fast_food.head()

In [None]:
## We need to convert the states to state codes in order to work with them - possible solution https://pypi.org/project/us/
states = {"AL":"Alabama",
"AK":"Alaska",
"AZ":"Arizona",
"AR":"Arkansas",
"CA":"California",
"CO":"Colorado",
"CT":"Connecticut",
"DE":"Delaware",
"FL":"Florida",
"GA":"Georgia",
"HI":"Hawaii",
"ID":"Idaho",
"IL":"Illinois",
"IN":"Indiana",
"IA":"Iowa",
"KS":"Kansas",
"KY":"Kentucky",
"LA":"Louisiana",
"ME":"Maine",
"MD":"Maryland",
"MA":"Massachusetts",
"MI":"Michigan",
"MN":"Minnesota",
"MS":"Mississippi",
"MO":"Missouri",
"MT":"Montana",
"NE":"Nebraska",
"NV":"Nevada",
"NH":"New Hampshire",
"NJ":"New Jersey",
"NM":"New Mexico",
"NY":"New York",
"NC":"North Carolina",
"ND":"North Dakota",
"OH":"Ohio",
"OK":"Oklahoma",
"OR":"Oregon",
"PA":"Pennsylvania",
"RI":"Rhode Island",
"SC":"South Carolina",
"SD":"South Dakota",
"TN":"Tennessee",
"TX":"Texas",
"UT":"Utah",
"VT":"Vermont",
"VA":"Virginia",
"WA":"Washington",
"WV":"West Virginia",
"WI":"Wisconsin",
"WY":"Wyoming"}

clean_fast_food['province'].replace(states, inplace=True)
clean_fast_food.head()

In [None]:
#renamed column from province to state for merge
clean_fast_food.columns = ['Index',
'Address',
'Category',
'City',
'Country',
'Latitude',
'Longitude',
'Name',
'ZipCode',
'State']
clean_fast_food.head()

In [None]:
clean_fast_food.to_csv('resources/clean_fast_food.csv', encoding='utf-8', index=False)

In [None]:
# Import the Obesity dataset, drop missing data

obesity_data = pd.read_csv('resources/obesity_data_state_2020.csv')
obesity_data = obesity_data.dropna()
obesity_data.head()

In [None]:
#merged clean fast food df with obesity df
fast_food_obesity= pd.merge(clean_fast_food, obesity_data, how = 'inner', on = 'State')
fast_food_obesity.head()

In [None]:
fast_food_obesity.to_csv('resources/clean_fast_obesity.csv', encoding='utf-8', index=False)

In [None]:
### Data Cleanup 
# Census data
# Census API Key

url = 'http://api.census.gov/data/2020/dec/pl?get=NAME,H1_001N&for=state:*'

from config import api_key
c = Census(api_key, year=2021)

census_data = requests.get(url).json()

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)
census_pd.columns = ["State", "Total Population", "State Number"]
census_pd

In [None]:
census_pd = census_pd.drop(census_pd.index[0])
census_pd

In [None]:
#merge tables by state and create data frame including state, fast food chain, obesity rate
fast_obesity_census= pd.merge(fast_food_obesity, census_pd, how = 'inner', on = 'State')
fast_obesity_census.head()

In [None]:
#dropped index and state number
clean_fast_obesity_census = fast_obesity_census.drop(['Index', 'State Number'], axis = 1)
clean_fast_obesity_census

In [None]:
clean_fast_obesity_census.to_csv('resources/clean_all_combined.csv', encoding='utf-8', index=False)