In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
import requests
import json
from census import Census
import gmaps


**Hypothesis**: The higher number of restaurants in a state will increase the obesity rate 

**Null Hypothesis**:  The number of fast food restaurants in a state has no impact on obesity rate in the state 

First we will sort the values based on independent variable 

Take ⅓ of the values and form a group - highest number of FF
Next ⅓ form a group - middle number of FF
Final ⅓ from a group - least number of FF

We will then use an Anova to test our hypothesis.  
Take the 3 datasets and see if they are significantly different from another 
stats.f_oneway(obesitylargestFF, obesity_middleFF, obesity_lowestFF)

If the p < 0.05, then we will reject the null hypothesis. 


In [2]:
# Import the fast food data file 
fastfood_data = pd.read_csv('resources/clean_fast_food.csv')
fastfood_data


Unnamed: 0,Index,Address,Category,City,Country,Latitude,Longitude,Name,ZipCode,State
0,0,800 N Canal Blvd,American Restaurant and Fast Food Restaurant,Thibodaux,US,29.814697,-90.814742,SONIC Drive In,70301,Louisiana
1,2,206 Wears Valley Rd,Fast Food Restaurant,Pigeon Forge,US,35.803788,-83.580553,Taco Bell,37863,Tennessee
2,3,3652 Parkway,Fast Food,Pigeon Forge,US,35.782339,-83.551408,Arby's,37863,Tennessee
3,4,2118 Mt Zion Parkway,Fast Food Restaurant,Morrow,US,33.562738,-84.321143,Steak 'n Shake,30260,Georgia
4,5,9768 Grand River Ave,Fast Food Restaurant,Detroit,US,42.368823,-83.138251,Wendy's,48204,Michigan
...,...,...,...,...,...,...,...,...,...,...
9297,9995,3460 Robinhood Rd,Fast Food Restaurants,Winston-Salem,US,36.117563,-80.316553,Pizza Hut,27106,North Carolina
9298,9996,3069 Kernersville Rd,Fast Food Restaurants,Winston-Salem,US,36.077718,-80.176748,Pizza Hut,27107,North Carolina
9299,9997,838 S Main St,Fast Food Restaurants,Kernersville,US,36.111015,-80.089165,Pizza Hut,27284,North Carolina
9300,9998,1702 Glendale Dr SW,Fast Food Restaurants,Wilson,US,35.719981,-77.945795,Pizza Hut,27893,North Carolina


In [3]:
obesity_data= pd.read_csv('resources/obesity_data_state_2020.csv')


In [4]:
fast_food_totals = fastfood_data.groupby("State")[["Index"]].count()
fast_food_totals

Unnamed: 0_level_0,Index
State,Unnamed: 1_level_1
Alabama,6
Alaska,14
Arizona,295
Arkansas,90
California,1111
Colorado,135
Connecticut,51
Delaware,38
Florida,574
Georgia,391


In [5]:
fast_food_by_state= pd.merge(fast_food_totals, obesity_data, how = 'right', on = 'State')

#drop any values that do not have data
fast_food_by_state = fast_food_by_state.dropna()

#drop Alabama since it doesn't have enough data for consideration 
fast_food_by_state = fast_food_by_state.drop([0])


In [6]:
fast_food_by_state

Unnamed: 0,State,Index,Prevalence,95% CI
1,Alaska,14.0,31.9,"(29.4, 34.4)"
2,Arizona,295.0,30.9,"(29.5, 32.3)"
3,Arkansas,90.0,36.4,"(34.5, 38.4)"
4,California,1111.0,30.3,"(28.3, 32.2)"
5,Colorado,135.0,24.2,"(23.1, 25.2)"
6,Connecticut,51.0,29.2,"(27.7, 30.8)"
7,Delaware,38.0,36.5,"(34.3, 38.8)"
9,Florida,574.0,28.4,"(26.7, 30.2)"
10,Georgia,391.0,34.3,"(32.5, 36.0)"
12,Hawaii,30.0,24.5,"(23.2, 25.9)"


In [7]:
fast_food_by_state.sort_values("Index",axis = 0, ascending = True, inplace = True)

In [8]:
fast_food_by_state.describe()

Unnamed: 0,Index,Prevalence
count,49.0,49.0
mean,189.714286,32.07551
std,206.697545,3.905313
min,13.0,24.2
25%,51.0,29.2
50%,143.0,31.9
75%,235.0,35.5
max,1111.0,39.7


In [9]:
fast_food_by_state

fast_food_by_state.columns = ['State',
'Fast Food Count',
'Obesity Rate', 'CI']

In [10]:
fast_food_by_state

Unnamed: 0,State,Fast Food Count,Obesity Rate,CI
47,Vermont,13.0,26.3,"(24.6, 27.9)"
41,Rhode Island,13.0,30.1,"(28.2, 32.1)"
1,Alaska,14.0,31.9,"(29.4, 34.4)"
20,Maine,23.0,31.0,"(29.6, 32.5)"
30,New Hampshire,27.0,29.9,"(28.2, 31.6)"
52,Wyoming,28.0,30.7,"(28.7, 32.7)"
12,Hawaii,30.0,24.5,"(23.2, 25.9)"
27,Montana,34.0,28.5,"(27.1, 30.0)"
35,North Dakota,34.0,33.1,"(31.1, 35.1)"
7,Delaware,38.0,36.5,"(34.3, 38.8)"


In [11]:
#Sort the dataframe
#Then group the dataframe into 3 groups 

group1 = fast_food_by_state[fast_food_by_state["Fast Food Count"] < 66]["Obesity Rate"]
group2 = fast_food_by_state[(fast_food_by_state["Fast Food Count"] > 67) & 
                            (fast_food_by_state["Fast Food Count"] < 181)]["Obesity Rate"]
group3 = fast_food_by_state[fast_food_by_state["Fast Food Count"] > 182]["Obesity Rate"]


In [12]:
group3

19    38.1
22    24.4
15    36.8
48    32.2
44    35.6
34    33.6
2     30.9
33    26.3
23    35.2
39    31.5
14    32.4
10    34.3
36    35.5
9     28.4
45    35.8
4     30.3
Name: Obesity Rate, dtype: float64

In [13]:
# Run the anova test 

st.f_oneway(group1, group2, group3)

F_onewayResult(statistic=0.3451259121937398, pvalue=0.7100746599651979)