## Mean Insurance Individual Rate & Census Poverty

In [17]:
import csv
import os
import pandas as pd
import json
import plotly.graph_objects as go
import numpy as np
from sklearn.preprocessing import normalize
from pyspark import SparkContext, SparkConf

### Setup Spark

In [2]:
sc = SparkContext.getOrCreate()

In [3]:
source = {'mean_idvrate_poverty': '../vizdata/mean_idvrate_poverty.csv',
        'mean_zri_by_state_year': '../vizdata/mean_zri_by_state_year.csv'}

In [4]:
def csv_split(x):
    return next(csv.reader([x], delimiter=',', quotechar='"'))

### plot Individual Rate by State

In [42]:
# read in mean_idvrate_poverty
mean_idvrate_poverty = sc.textFile(source['mean_idvrate_poverty']).map(csv_split)
state_poverty = mean_idvrate_poverty.map(lambda x: (x[0], x[3])).groupByKey().mapValues(lambda x: list(x)[0])
state_poverty.collect()

[('State', 'Poverty'),
 ('AK', '13.341379310344827'),
 ('AR', '21.551999999999982'),
 ('DE', '12.5'),
 ('HI', '13.219999999999999'),
 ('KS', '12.882857142857146'),
 ('LA', '21.956250000000008'),
 ('ME', '15.1'),
 ('MO', '18.058260869565213'),
 ('NC', '19.369999999999994'),
 ('NE', '11.975268817204299'),
 ('NM', '21.121212121212125'),
 ('OH', '15.334090909090902'),
 ('SC', '21.213043478260868'),
 ('SD', '16.743939393939396'),
 ('TN', '19.450526315789467'),
 ('TX', '17.159055118110235'),
 ('VA', '14.972932330827074'),
 ('WI', '12.519444444444446'),
 ('WV', '18.79090909090909'),
 ('WY', '11.68695652173913'),
 ('AL', '21.76268656716418'),
 ('AZ', '21.3'),
 ('FL', '18.250746268656716'),
 ('GA', '22.198113207547163'),
 ('IA', '11.850505050505049'),
 ('ID', '16.193181818181817'),
 ('IL', '14.210784313725494'),
 ('IN', '13.89021739130435'),
 ('MI', '16.687951807228917'),
 ('MS', '25.723170731707313'),
 ('MT', '15.719642857142855'),
 ('ND', '11.424528301886792'),
 ('NH', '10.169999999999998'),


In [43]:
# read in mean_idvrate_poverty
mean_idvrate_poverty = sc.textFile(source['mean_idvrate_poverty']).map(csv_split)
state_idvrate = mean_idvrate_poverty.map(lambda x: (x[0], x[2])).groupByKey().mapValues(lambda x: list(x)[0])
state_idvrate.collect()
norm_state_idvrate = np.array(state_idvrate.map(lambda x: x[1]).collect()[1:])
norm_state_idvrate = normalize(norm_state_idvrate[:,np.newaxis], axis=0).ravel() * 100
norm_state_idvrate

array([27.51630938, 12.46050473,  9.0356088 , 10.41020704, 11.84168015,
       12.51853207, 18.10446148,  2.91830571, 17.44866681, 15.31245713,
       10.03702224, 16.50027364, 15.62046194, 14.66155451, 14.54259173,
       12.26943674, 16.50590023, 19.42653886, 18.45194625, 20.55248794,
       20.21583313, 13.373096  , 19.50629189, 18.69920873, 13.27231959,
       13.75372879,  2.53108042, 17.81493551, 17.75134484, 16.87370834,
       16.36497593, 16.62252133, 15.54488984, 27.49923264, 16.8129375 ,
       11.10429643, 12.21859457, 14.92425659, 13.86120627])

In [44]:
# read in geomap
with open('../vizdata/gz_2010_us_040_00_500k.json') as json_file:
    states = json.load(json_file)

In [45]:
# plot Individual Rate by State by State
fig = go.Figure(data=go.Choropleth(
    locations=state_poverty.keys().collect()[1:], # Spatial coordinates
    z = norm_state_idvrate, # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "Individual Rate (%) ",
))

fig.update_layout(
    title_text = 'Individual Rate by State',
    geo_scope='usa', # limite map scope to USA
)

fig.show()

In [47]:
# plot percent under poverty level by State
fig = go.Figure(data=go.Choropleth(
    locations=state_poverty.keys().collect()[1:], # Spatial coordinates
    z = pd.Series(state_poverty.values().collect()[1:]).astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "% under poverty level",
))

fig.update_layout(
    title_text = 'Percent Under Poverty Level By State',
    geo_scope='usa', # limite map scope to USA
)

fig.show()

### plot Individual Rate vs. Percent Under Poverty Level

In [33]:
# read in mean_idvrate_poverty
mean_idvrate_poverty = sc.textFile(source['mean_idvrate_poverty']).map(csv_split)
state_poverty = mean_idvrate_poverty.map(lambda x: (x[0], (x[2], x[3]))).groupByKey().mapValues(lambda x: list(x)[0])
norm_state_poverty = np.array(state_poverty.map(lambda x: x[1][0]).collect()[1:])
norm_state_poverty = normalize(norm_state_poverty[:,np.newaxis], axis=0).ravel() * 100
norm_state_poverty

array([27.51630938, 12.46050473,  9.0356088 , 10.41020704, 11.84168015,
       12.51853207, 18.10446148,  2.91830571, 17.44866681, 15.31245713,
       10.03702224, 16.50027364, 15.62046194, 14.66155451, 14.54259173,
       12.26943674, 16.50590023, 19.42653886, 18.45194625, 20.55248794,
       20.21583313, 13.373096  , 19.50629189, 18.69920873, 13.27231959,
       13.75372879,  2.53108042, 17.81493551, 17.75134484, 16.87370834,
       16.36497593, 16.62252133, 15.54488984, 27.49923264, 16.8129375 ,
       11.10429643, 12.21859457, 14.92425659, 13.86120627])

In [34]:
# x = Individual Rate, y = Poverty
fig = go.Figure(data=go.Scatter(x=norm_state_poverty, y=state_poverty.map(lambda x: x[1][1]).collect()[1:], 
                                text=state_poverty.map(lambda x: x[0]).collect()[1:], 
                                mode='markers', marker=go.scatter.Marker(
                                    size=15,
                                    opacity=0.6,
                                    colorscale="Viridis")))
fig.update_layout(
    title_text = 'Individual Rate vs. Percent Under Poverty Level',
    xaxis_title="Individual Rate",
    yaxis_title="Percent Under Poverty Level"
)
fig.show()

### Stop Spark

In [10]:
# sc.stop()