## Mean Insurance Individual Rate & Census Poverty

In [1]:
import csv
import os
import pandas as pd
import json
from pyspark import SparkContext, SparkConf

### Setup Spark

In [2]:
sc = SparkContext.getOrCreate()

In [47]:
source = {'mean_zri_by_state_year': '../vizdata/mean_zri_by_state_year.csv',
        'mean_zri_by_state_year': '../vizdata/mean_zri_by_state_year.csv'}

In [4]:
def csv_split(x):
    return next(csv.reader([x], delimiter=',', quotechar='"'))

### plot 2015 US percent under poverty level by State

In [5]:
# read in mean_idvrate_poverty
mean_idvrate_poverty = sc.textFile(source['mean_idvrate_poverty']).map(csv_split)
state_poverty = mean_idvrate_poverty.map(lambda x: (x[0], x[3])).groupByKey().mapValues(lambda x: list(x)[0])
state_poverty.collect()

[('State', 'Poverty'),
 ('AK', '13.341379310344827'),
 ('AR', '21.551999999999982'),
 ('DE', '12.5'),
 ('HI', '13.219999999999999'),
 ('KS', '12.882857142857146'),
 ('LA', '21.956250000000008'),
 ('ME', '15.1'),
 ('MO', '18.058260869565213'),
 ('NC', '19.369999999999994'),
 ('NE', '11.975268817204299'),
 ('NM', '21.121212121212125'),
 ('OH', '15.334090909090902'),
 ('SC', '21.213043478260868'),
 ('SD', '16.743939393939396'),
 ('TN', '19.450526315789467'),
 ('TX', '17.159055118110235'),
 ('VA', '14.972932330827074'),
 ('WI', '12.519444444444446'),
 ('WV', '18.79090909090909'),
 ('WY', '11.68695652173913'),
 ('AL', '21.76268656716418'),
 ('AZ', '21.3'),
 ('FL', '18.250746268656716'),
 ('GA', '22.198113207547163'),
 ('IA', '11.850505050505049'),
 ('ID', '16.193181818181817'),
 ('IL', '14.210784313725494'),
 ('IN', '13.89021739130435'),
 ('MI', '16.687951807228917'),
 ('MS', '25.723170731707313'),
 ('MT', '15.719642857142855'),
 ('ND', '11.424528301886792'),
 ('NH', '10.169999999999998'),


In [15]:
# read in geomap
with open('../vizdata/gz_2010_us_040_00_500k.json') as json_file:
    states = json.load(json_file)

In [16]:
# plot 2015 US percent under poverty level by State
fig = go.Figure(data=go.Choropleth(
    locations=state_poverty.keys().collect()[1:], # Spatial coordinates
    z = pd.Series(state_poverty.values().collect()[1:]).astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "% under poverty level",
))

fig.update_layout(
    title_text = '2015 US percent under poverty level by State',
    geo_scope='usa', # limite map scope to USA
)

fig.show()

### plot 2015 US percent under poverty level vs. rent index by State

In [86]:
# read in mean_idvrate_poverty
import numpy as np
from sklearn.preprocessing import normalize

mean_zri_by_state_year = sc.textFile(source['mean_zri_by_state_year']).map(csv_split)
zri = mean_zri_by_state_year.filter(lambda x: x[1] == '2015').map(lambda x: (x[0], x[2])).sortByKey().map(lambda x: x[1])
zri = np.array(zri.collect())



zri = normalize(zri[:,np.newaxis], axis=0).ravel() * 100
zri

array([17.19609784,  9.42450282,  8.68769935, 11.79256307, 18.35196346,
       16.27956718, 14.9466682 , 20.14165039, 12.88035269, 15.03881989,
       10.10860247, 22.13855245, 10.28228164, 10.4279636 , 10.98898355,
        9.40470914,  9.90813742,  9.19612017, 12.00781397, 18.97004018,
       15.0944128 , 12.25558644, 10.1972202 , 13.39753568, 37.36684633,
        9.74273299, 12.12269248, 10.6339636 , 16.22224532, 11.6304436 ,
       13.34253619, 15.74614304, 11.28843854, 12.94964563, 16.13337931,
        9.91310844,  9.24083779, 12.8303262 , 11.13923587, 13.96764932,
       11.40887189, 10.45861139,  9.96440454, 12.40682415, 14.0285066 ,
       11.82499107, 16.17645552, 12.61738598, 10.21298672, 10.23208192,
       12.4860859 ])

In [87]:
# x = rent index, y = poverty
fig = go.Figure(data=go.Scatter(x=zri, y=state_poverty.map(lambda x: x[1]).collect()[1:], text=state_poverty.map(lambda x: x[0]).collect()[1:], mode='markers'))
fig.update_layout(
    title_text = '2015 US percent under poverty level vs. rent index by State',
    xaxis_title="rent index",
    yaxis_title="percent under poverty level"
)
fig.show()

### Stop Spark

In [17]:
sc.stop()