In [32]:
# import necessary libraries
import os
import json
from numpy import genfromtxt
from time import time
from datetime import datetime
from sqlalchemy import create_engine
from geojson import Feature, FeatureCollection, Point
import csv
import pandas as pd
import pprint

#Creating engine object to the Depression Database
engine = create_engine('sqlite:///depression.db')
#Creating FileName variables pre CSV
npao_file_name = 'cdc_npao.csv'
depression_file_name = 'depression_data.csv'
alcohol_file_name = 'alcohol_data.csv'
income_file_name = 'income_data.csv'
poverty_file_name = 'poverty_data.csv'

#Reading file contents into respective DataFrame
npao_df = pd.read_csv(npao_file_name)
depression_df = pd.read_csv(depression_file_name)
depression_df = depression_df.filter(["STATE NAME", "Yes%"])
depression_df.rename(columns = {"STATE NAME":'state', "Yes%": "yes_percent"}, inplace = True) 
alcohol_df = pd.read_csv(alcohol_file_name)
alcohol_df = alcohol_df.filter(["state","factor","yes_percent"])
income_df = pd.read_csv(income_file_name)
income_df = income_df.filter(["state","factor","yes_percent"])
poverty_df = pd.read_csv(poverty_file_name)
poverty_df = poverty_df.filter(["state","factor","yes_percent"])

#Writing the DataFrame into Sqlite tables
npao_df.to_sql(con=engine, index_label='id', name='npao', if_exists='replace')
depression_df.to_sql(con=engine, index_label='id', name='depression', if_exists='replace')
alcohol_df.to_sql(con=engine, index_label='id', name='alcohol', if_exists='replace')
income_df.to_sql(con=engine, name='income', if_exists='replace')
poverty_df.to_sql(con=engine,  name='poverty', if_exists='replace')
con = engine.connect() 
#Filtering out data for Obesity from the NutritionPhysicalActivityObesity table
sql_query = "select LocationDesc as state,data_value as factor, yes_percent  from npao inner join depression on upper(npao.LocationDesc) = upper(depression.state) where npao.Total = 'Total' and QuestionID = 'Q036' and LocationDesc <> 'National' and YearStart = '2014'"
#Creating DataFrame for Obesity data
obesity_df = pd.read_sql(sql_query, con)
obesity_df.to_sql(con=engine, index_label='id', name='obesity', if_exists='replace')


In [52]:
print(obesity_df)

             state  factor  yes_percent
0          Alabama    33.5    68.852459
1           Alaska    29.7    63.636364
2          Arizona    28.9    66.666667
3         Arkansas    35.9    68.421053
4       California    24.7    67.032967
5         Colorado    21.3    69.473684
6      Connecticut    26.3    69.230769
7         Delaware    30.7    66.666667
8          Florida    26.2    65.745856
9          Georgia    30.5    67.543860
10          Hawaii    22.1    70.000000
11           Idaho    28.9    60.000000
12        Illinois    29.3    63.809524
13         Indiana    32.7    66.304348
14            Iowa    30.9    64.102564
15          Kansas    31.3    64.864865
16        Kentucky    31.6    72.727273
17       Louisiana    34.9    67.796610
18           Maine    28.2    64.000000
19        Maryland    29.6    69.148936
20   Massachusetts    23.3    65.765766
21        Michigan    30.7    67.469880
22       Minnesota    27.6    68.604651
23     Mississippi    35.5    70.588235


In [51]:
#Creating empty Dictionaries for individual data sets
depression = {}
obesity = {}
alcohol = {}
income = {}
poverty = {}

#Querying data from Database and saving it in variables
depression_result = con.execute("select * from depression")
obesity_result = con.execute("select * from obesity")
alcohol_result = con.execute("select * from alcohol")
income_result = con.execute("select * from income where STATE <> 'United States'")
poverty_result = con.execute("select * from poverty where STATE <> 'United States'")

#Appending data to individual dictionaries
for row in depression_result:
    depression[row[1].upper()] = round(row[2],2)
for row in obesity_result:
    obesity[row[1].upper()] = row[2]
for row in alcohol_result:
    alcohol[row[1].upper()] = round(row[2],2)   
for row in income_result:
    income[row[1].upper()] = row[2]    
for row in poverty_result:
    poverty[row[1].upper()] = row[2]  


# Reading the State Co-ordinated GeoJSON file
with open('StateCoord_geojson.json', 'r') as geo_json:
    json_load_dep = json.load(geo_json)
    
# #Appending Depression data to the GeoJSON file
for k, v in depression.items():
    for i in range(len(json_load_dep['features'])):
        if json_load_dep['features'][i]['properties']['NAME'].upper() == k:
            json_load_dep['features'][i]['properties']['DEPRESSION'] = v            
            json_load_dep['features'][i]['properties']['FACTOR'] = 'Depression'

#Writing the Depression GeoJSON into a new file
with open('depression_geojson.json', 'w') as dep_geo_json:
    json.dump(json_load_dep, dep_geo_json)
    

# Reading the State Co-ordinated GeoJSON file
with open('StateCoord_geojson.json', 'r') as geo_json:
    json_load_alcohol = json.load(geo_json)
    
# #Appending Alcohol data to the GeoJSON file
for k, v in alcohol.items():
    for i in range(len(json_load_alcohol['features'])):
        if json_load_alcohol['features'][i]['properties']['NAME'].upper() == k:
            json_load_alcohol['features'][i]['properties']['ALCOHOL'] = v            
            json_load_alcohol['features'][i]['properties']['FACTOR'] = 'Alcohol'
            

#Writing the Alcohol GeoJSON into a new file
with open('alcohol_geojson.json', 'w') as alcohol_geo_json:
    json.dump(json_load_alcohol, alcohol_geo_json)

# Reading the State Co-ordinated GeoJSON file
with open('StateCoord_geojson.json', 'r') as geo_json:
    json_load_obesity = json.load(geo_json)
    
# #Appending obesity data to the GeoJSON file
for k, v in obesity.items():
    for i in range(len(json_load_obesity['features'])):
        if json_load_obesity['features'][i]['properties']['NAME'].upper() == k:
            json_load_obesity['features'][i]['properties']['OBESITY'] = v            
            json_load_obesity['features'][i]['properties']['FACTOR'] = 'Obesity'
            

#Writing the obesity GeoJSON into a new file
with open('obesity_geojson.json', 'w') as obesity_geo_json:
    json.dump(json_load_obesity, obesity_geo_json)
    

# Reading the State Co-ordinated GeoJSON file
with open('StateCoord_geojson.json', 'r') as geo_json:
    json_load_income = json.load(geo_json)
    
# #Appending income data to the GeoJSON file
for k, v in income.items():
    for i in range(len(json_load_income['features'])):
        if json_load_income['features'][i]['properties']['NAME'].upper() == k:
            json_load_income['features'][i]['properties']['INCOME'] = v            
            json_load_income['features'][i]['properties']['FACTOR'] = 'Income'
            

#Writing the income GeoJSON into a new file
with open('income_geojson.json', 'w') as income_geo_json:
    json.dump(json_load_income, income_geo_json)

# Reading the State Co-ordinated GeoJSON file
with open('StateCoord_geojson.json', 'r') as geo_json:
    json_load_poverty = json.load(geo_json)
    
# #Appending income data to the GeoJSON file
for k, v in poverty.items():
    for i in range(len(json_load_poverty['features'])):
        if json_load_poverty['features'][i]['properties']['NAME'].upper() == k:
            json_load_poverty['features'][i]['properties']['POVERTY'] = v            
            json_load_poverty['features'][i]['properties']['FACTOR'] = 'Poverty'
            

#Writing the income GeoJSON into a new file
with open('poverty_geojson.json', 'w') as poverty_geo_json:
    json.dump(json_load_poverty, poverty_geo_json)



{'ALABAMA': 16.9, 'ALASKA': 11.0, 'ARIZONA': 14.9, 'ARKANSAS': 16.3, 'CALIFORNIA': 13.3, 'COLORADO': 10.3, 'CONNECTICUT': 9.7, 'DELAWARE': 13.0, 'FLORIDA': 14.1, 'GEORGIA': 15.1, 'HAWAII': 9.5, 'IDAHO': 12.6, 'ILLINOIS': 12.5, 'INDIANA': 13.3, 'IOWA': 10.8, 'KANSAS': 11.9, 'KENTUCKY': 17.1, 'LOUISIANA': 19.6, 'MAINE': 11.3, 'MARYLAND': 9.4, 'MASSACHUSETTS': 10.5, 'MICHIGAN': 14.1, 'MINNESOTA': 9.5, 'MISSISSIPPI': 19.9, 'MISSOURI': 13.4, 'MONTANA': 12.7, 'NEBRASKA': 10.7, 'NEVADA': 13.3, 'NEW HAMPSHIRE': 7.7, 'NEW JERSEY': 10.0, 'NEW MEXICO': 19.0, 'NEW YORK': 14.1, 'NORTH CAROLINA': 14.7, 'NORTH DAKOTA': 10.2, 'OHIO': 13.9, 'OKLAHOMA': 15.8, 'OREGON': 13.2, 'PENNSYLVANIA': 12.5, 'RHODE ISLAND': 12.3, 'SOUTH CAROLINA': 15.4, 'SOUTH DAKOTA': 12.8, 'TENNESSEE': 15.0, 'TEXAS': 14.7, 'UTAH': 9.7, 'VERMONT': 10.8, 'VIRGINIA': 10.7, 'WASHINGTON': 11.0, 'WEST VIRGINIA': 18.5, 'WISCONSIN': 11.3, 'WYOMING': 10.8}
