# FOOD ACCESS

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
from scipy import stats

# Study data files
foodaccess_path = "Resourses/FoodAccessResearchAtlasData2019.csv"
obesity_path = "Resourses/National_Obesity_By_State.csv"

# Read the mouse data and the study results
Foodaccess = pd.read_csv(foodaccess_path)
Obesity = pd.read_csv(obesity_path, index_col= "State")

# Display the data frames 
Foodaccess
Obesity

Unnamed: 0_level_0,FID,Obesity,SHAPE_Length,SHAPE_Area
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Texas,1,32.4,15408320.0,7672330000000.0
California,2,24.2,14518700.0,5327810000000.0
Kentucky,3,34.6,6346699.0,1128830000000.0
Georgia,4,30.7,5795596.0,1652980000000.0
Wisconsin,5,30.7,6806782.0,1567820000000.0
Oregon,6,30.1,7976011.0,3178450000000.0
Virginia,7,29.2,7710804.0,1158800000000.0
Tennessee,8,33.8,6350377.0,1177050000000.0
Louisiana,9,36.2,7383857.0,1355090000000.0
New York,10,25.0,7981383.0,1411440000000.0


In [2]:
#Create a new column with the info of Families Under Pover by State
Foodaccess['FamiliesUnderPover'] = (Foodaccess['OHU2010'] * Foodaccess['PovertyRate'])/(100)

In [3]:
#Cleaning the data
obesity_clean = Obesity.drop(columns= ['FID', 'SHAPE_Length', 'SHAPE_Area'])

 #Display the Data Frame
obesity_clean

Unnamed: 0_level_0,Obesity
State,Unnamed: 1_level_1
Texas,32.4
California,24.2
Kentucky,34.6
Georgia,30.7
Wisconsin,30.7
Oregon,30.1
Virginia,29.2
Tennessee,33.8
Louisiana,36.2
New York,25.0


In [4]:
# Create a clean DataFrame using group by State .
PovertyByState = Foodaccess.groupby(['State']).agg({'Pop2010':'sum','FamiliesUnderPover':'sum'})
PovertyByState

#export files 
PovertyByState.to_csv('../Proyect 3 data/PovertyByState.csv',
                encoding = 'utf-8', index= False, header =True)

PovertyByState.to_json('../Proyect 3 data/PovertyByState.json')

 #Display the Data Frame
PovertyByState


Unnamed: 0_level_0,Pop2010,FamiliesUnderPover
State,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,4779736,342551.48
Alaska,710231,27456.97
Arizona,6392017,371202.424
Arkansas,2915918,207272.717
California,37253956,1735055.861
Colorado,5029196,223126.749
Connecticut,3574097,138094.723
Delaware,897934,40927.44
District of Columbia,601723,42997.948
Florida,18801310,1085352.178


In [7]:
# Combine the data into a single DataFrame
PovertyByState_clean = pd.merge(PovertyByState, obesity_clean, left_index= True, right_index=True)

#Reindex the Data Frame 
PovertyByState_clean1_data = PovertyByState_clean.reset_index()


#Export files 
PovertyByState_clean1_data.to_csv('../Proyect 3 data/PovertyByState_clean1_data.csv',
                encoding = 'utf-8', index= False, header =True)

PovertyByState_clean1_data.to_json('../Proyect 3 data/PovertyByState_clean1_data.json')

 #Display the Data Frame
PovertyByState_clean1_data

Unnamed: 0,State,Pop2010,FamiliesUnderPover,Obesity
0,Alabama,4779736,342551.48,35.6
1,Alaska,710231,27456.97,29.8
2,Arizona,6392017,371202.424,28.4
3,Arkansas,2915918,207272.717,34.5
4,California,37253956,1735055.861,24.2
5,Colorado,5029196,223126.749,20.2
6,Connecticut,3574097,138094.723,25.3
7,Delaware,897934,40927.44,29.7
8,District of Columbia,601723,42997.948,22.1
9,Florida,18801310,1085352.178,26.8


In [8]:
# Create a  DataFrame using group by State using distances
TotalAccessbydistance = Foodaccess.groupby(['State']).agg({'lapophalfshare':'mean',
'lapop1share':'mean','lapop10share':'mean', 'lapop20share' : 'mean','PovertyRate':'mean'})

TotalAccessbydistance 


# Create a clean DataFrame and remane the columns
TotalAccessbydistance_clean = TotalAccessbydistance.rename(columns={'lapophalfshare':'%_Pop.1/2_mile','lapop1share':'% Pop.1 mile', 
'lapop10share':'%_Pop.10_miles', 'lapop20share':'%_Pop.20_miles', 'PovertyRate':'Poverty_Rate'}).round(2)

#Reindex the DataFrame
TotalAccessbydistance_clean1_data = TotalAccessbydistance_clean.reset_index()

#Export files 
TotalAccessbydistance_clean1_data.to_csv('../Proyect 3 data/TotalAccessbydistance_clean1_data.csv',
                encoding = 'utf-8', index= False, header =True)

TotalAccessbydistance_clean1_data.to_json('../Proyect 3 data/TotalAccessbydistance_clean1_data.json')

#display the DataFrame
TotalAccessbydistance_clean1_data


Unnamed: 0,State,%_Pop.1/2_mile,% Pop.1 mile,%_Pop.10_miles,%_Pop.20_miles,Poverty_Rate
0,Alabama,85.07,65.28,19.96,2.11,20.06
1,Alaska,81.41,65.98,36.49,31.82,11.43
2,Arizona,72.6,50.11,39.38,25.17,16.67
3,Arkansas,85.93,63.8,23.69,3.16,19.0
4,California,57.0,39.6,18.84,11.23,14.55
5,Colorado,73.48,48.86,23.88,12.31,11.61
6,Connecticut,74.7,51.96,3.68,,11.12
7,Delaware,75.12,51.0,,,12.86
8,District of Columbia,46.14,25.51,,,17.5
9,Florida,71.13,46.89,16.42,4.54,15.3


In [10]:
#Comnine the DataFrames and rename the column of obesity
TotalAccessbydistance_obesity = pd.merge(TotalAccessbydistance_clean, obesity_clean, left_index= True, right_index=True)
TotalAccessbydistance_obesity_clean = TotalAccessbydistance_obesity.rename(columns={'Obesity':'Obesity_Rate'})

#Reinex the data Frame
TotalAccessbydistance_obesity_clean1_data = TotalAccessbydistance_obesity_clean.reset_index()

#export files 
TotalAccessbydistance_obesity_clean1_data.to_csv('../Proyect 3 data/TotalAccessbydistance_obesity_clean1_data.csv',
                encoding = 'utf-8', index= False, header =True)

TotalAccessbydistance_obesity_clean1_data.to_json('../Proyect 3 data/TotalAccessbydistance_obesity_clean1_data.json')

TotalAccessbydistance_obesity_clean1_data

Unnamed: 0,State,%_Pop.1/2_mile,% Pop.1 mile,%_Pop.10_miles,%_Pop.20_miles,Poverty_Rate,Obesity_Rate
0,Alabama,85.07,65.28,19.96,2.11,20.06,35.6
1,Alaska,81.41,65.98,36.49,31.82,11.43,29.8
2,Arizona,72.6,50.11,39.38,25.17,16.67,28.4
3,Arkansas,85.93,63.8,23.69,3.16,19.0,34.5
4,California,57.0,39.6,18.84,11.23,14.55,24.2
5,Colorado,73.48,48.86,23.88,12.31,11.61,20.2
6,Connecticut,74.7,51.96,3.68,,11.12,25.3
7,Delaware,75.12,51.0,,,12.86,29.7
8,District of Columbia,46.14,25.51,,,17.5,22.1
9,Florida,71.13,46.89,16.42,4.54,15.3,26.8
