In [22]:
import pandas as pd
import censusdata
from censusdata import censusgeo
from census import Census
from us import states
import os, sys, string

pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 10)
palette.reverse()
EXCLUDED = ("ak", "hi", "pr", "gu", "vi", "mp", "as")

'''
This class use to download, format and play with the data available on Census
'''
class CensusData:
    '''
    Download the data from Census
    '''
    def download_county_state_census_data(self):
        # Download censusgeo for all county
        # Codes for specfic type of data to load. We usually consider the data for Gender who earn more than 100K,
        # Students with bachloer degrees, and people between age 20 to 44
        # This data is for year 2015
        county65plus = censusdata.download('acs1', 2015, censusdata.censusgeo([('county', '*')]),
                                   ['B28006_001E', 'B01001_020E', 'B01001_021E', 'B01001_022E', 'B01001_023E',
                                    'B01001_024E', 'B01001_025E', 'B01001_044E', 'B01001_045E', 'B01001_046E',
                                    'B01001_047E', 'B01001_048E', 'B01001_049E'])
        county65plus.describe()
        censusdata.censustable('acs1', 2016, 'B01001G')
        censusgeo([('state', '06'), ('place', '53000')], 'Oakland city, California')
        censusgeo([('state', '06'), ('place', '53000')], 'Oakland city, California')
        
        
        # Download censusgeo for all county
        # Codes for specfic type of data to load. We usually consider the data for Gender who earn more than 100K,
        # Students with bachloer degrees, and people between age 20 to 44
        # This data is for year 2016
        relevant_data = censusdata.download('acs1', 2016, censusdata.censusgeo([('state', '*'), ('county', '*')]), ['B20001_022E','B20001_043E', 'B28006_014E','B28006_015E','B28006_016E','B28006_017E','B28006_018E','B28006_019E','B01001G_008E','B01001G_009E','B01001G_010E','B01001G_011E','C15003_015E','C15003_018E','C15003_016E','C15003_017E'])
        # Filtering out data for student have bachelor degree
        relevant_data['bachelor_degree_plus'] = relevant_data.B28006_014E + relevant_data.B28006_015E + relevant_data.B28006_016E+relevant_data.B28006_017E+relevant_data.B28006_018E+relevant_data.B28006_019E
        # Filtering out data for people between age 20 to 44
        relevant_data['between_20_44'] = relevant_data.B01001G_008E + relevant_data.B01001G_009E + relevant_data.B01001G_010E + relevant_data.B01001G_011E
        # Filtering out data for Male who earn more than 100K
        relevant_data=relevant_data.rename(columns={'B20001_022E':'male_greater_100k'})
        # Filtering out data for Female who earn more than 100K
        relevant_data=relevant_data.rename(columns={'B20001_043E':'femals_greater_100k'})
        
        pd.set_option('display.max_rows', len(relevant_data))
        relevant_data = relevant_data.fillna(0)
        # Clearing other types 
        relevant_data = relevant_data.drop(['B28006_014E','B28006_015E','B28006_016E','B28006_017E','B28006_018E','B28006_019E','B01001G_008E','B01001G_009E','B01001G_010E','B01001G_011E','C15003_015E','C15003_018E','C15003_016E','C15003_017E'],axis=1)
        censusdata.export.exportcsv('sample_final.csv', relevant_data)
        print('---- Census Data downloaded successfully ----')
        
    '''
    Clean the file downloaded from census to required format
    '''
    def clean_file(self):
        s = open("sample_final.csv").read()
        s = s.replace('County', '')
        s = s.replace('"', '')
        s = s.replace(' ', '')
        f = open("sample_final.csv", 'w')
        f.write(s)
        f.close()
        with open("sample_final.csv",'r') as f:
            with open("updated_test.csv",'w') as f1:
                f.next() # skip header line
                for line in f:
                    f1.write(line)
        # Upload data to s3 bucket
        print('---- Data loaded successfully to file -----')

        
data = CensusData()
data.download_county_state_census_data()
data.clean_file()      

---- Census Data downloaded successfully ----
---- Data loaded successfully to file -----


In [23]:
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.us_counties import data as counties
from bokeh.sampledata.us_states import data as states
from bokeh.sampledata.unemployment import data as unemployment
from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis6 as palette
import pandas as pd

palette.reverse()
EXCLUDED = ("ak", "hi", "pr", "gu", "vi", "mp", "as")

'''
Visualize the graph
'''
class VisualizeData:
    '''
    Print the graph by data in csv file
    '''
    def print_bachelor_degree_graph(self):
        # take latitude and longitude for us states
        state_xs = [states[code]["lons"] for code in states]
        state_ys = [states[code]["lats"] for code in states]

        # take latitude and longitude for us states's county
        county_xs=[counties[code]["lons"] for code in counties if counties[code]["state"] not in EXCLUDED]
        county_ys=[counties[code]["lats"] for code in counties if counties[code]["state"] not in EXCLUDED]
        county_names = [counties[code]['name'] for code in counties if counties[code]["state"] not in EXCLUDED] 

        colors = ["#F1EEF6", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77", "#980043"]

        df = pd.read_csv('/Users/abhumkar/Downloads/final_output.csv', dtype={'state': str})
        for col in df.columns:
            df[col] = df[col].astype(str)

        bachelor_degree_plus = {}
        for index, row in df.iterrows():
            bachelor_degree_plus[(int(row['state']), int(row['county']))] = float(row['bachelor_degree_plus']) 

        county_colors = []
        bachelor_degree_plus_rates = []
        for county_id in counties:
            rate = 0
            if counties[county_id]["state"] in EXCLUDED:
                bachelor_degree_plus_rates.append(0)
                continue
            try:
                rate = bachelor_degree_plus[county_id]
                bachelor_degree_plus_rates.append(rate)
            except KeyError:
                bachelor_degree_plus_rates.append(0)

        # Create dictionary for the data available to visualize
        color_mapper = LogColorMapper(palette=palette)
        data=dict(
            x=county_xs,
            y=county_ys,
            name=county_names,
            rate=bachelor_degree_plus_rates,
        )

        # Make graph using bokeh
        p = figure(title="Bachelor Degree Plus", toolbar_location="left",
                   plot_width=1100, plot_height=700, tooltips=[
                    ("Name", "@name"), ("Bachelor Degree Plus", "@rate"), ("(Long, Lat)", "($x, $y)")
                ])

        p.patches('x', 'y', source=data,
                  fill_color={'field': 'rate', 'transform': color_mapper}, fill_alpha=0.7,
                  line_color="white", line_width=0.5)

        output_file("choropleth3.html", title="choropleth3.py example")
        # show graph
        show(p)
        
show_graph = VisualizeData()
show_graph.print_bachelor_degree_graph()

