# Correlation between health insurance and other indicators of poverty
This notebook is used to check if there is any correlation between percentage of uninsured people in a state and other indicators of income

In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

In [2]:
STATE_DTYPES = {'StateFIPS': str, 'State': str}
df = pd.read_csv('../datasets/Food_Atlas_State_2013.csv', encoding='cp1252', dtype=STATE_DTYPES, thousands=',')
df.head(5)

Unnamed: 0,StateFIPS,State,"WIC participants, FY 2013","National School Lunch Program participants, FY 2013","School Breakfast Program participants, FY 2013","Child and Adult Care participants, FY 2013","Summer Food participants, FY 2013","State Population, 2013",Percent population uninsured,Cost of living index
0,1,Alabama,139001,539664,224173,41863,30308,4833722,13.6,92.4
1,2,Alaska,23055,52495,20741,10963,5078,735132,18.5,131.1
2,4,Arizona,182501,646767,289520,42945,23688,6626624,17.1,100.8
3,5,Arkansas,89777,332497,166376,67630,41946,2959373,16.0,91.0
4,6,California,1431881,3294580,1517341,451422,114689,38332521,17.2,128.6


In [3]:
df['Cost of living index'] = df['Cost of living index']/100
df['Percent population uninsured'] = df['Percent population uninsured']/100
df.head(5)

Unnamed: 0,StateFIPS,State,"WIC participants, FY 2013","National School Lunch Program participants, FY 2013","School Breakfast Program participants, FY 2013","Child and Adult Care participants, FY 2013","Summer Food participants, FY 2013","State Population, 2013",Percent population uninsured,Cost of living index
0,1,Alabama,139001,539664,224173,41863,30308,4833722,0.136,0.924
1,2,Alaska,23055,52495,20741,10963,5078,735132,0.185,1.311
2,4,Arizona,182501,646767,289520,42945,23688,6626624,0.171,1.008
3,5,Arkansas,89777,332497,166376,67630,41946,2959373,0.16,0.91
4,6,California,1431881,3294580,1517341,451422,114689,38332521,0.172,1.286


In [4]:
df['Cost of living index'].corr(df['Percent population uninsured'])

-0.37606983538341443

In [5]:
# correlation with uninsured population
correlation = []
correlation.append(df['WIC participants, FY 2013'].corr(df['Percent population uninsured']))
correlation.append(df['National School Lunch Program participants, FY 2013'].corr(df['Percent population uninsured']))
correlation.append(df['School Breakfast Program participants, FY 2013'].corr(df['Percent population uninsured']))
correlation.append(df['Child and Adult Care participants, FY 2013'].corr(df['Percent population uninsured']))

In [6]:
# correlation with cost of living index -> as expected, little to no correlation
print(df['WIC participants, FY 2013'].corr(df['Cost of living index']))
print(df['National School Lunch Program participants, FY 2013'].corr(df['Cost of living index']))
print(df['School Breakfast Program participants, FY 2013'].corr(df['Cost of living index']))
print(df['Child and Adult Care participants, FY 2013'].corr(df['Cost of living index']))

0.09697076435186365
0.0006489072708318793
-0.034334758113415687
0.07955024831566479


In [7]:
categories = ['WIC participants', 'National School Lunch Program participants', 
              'School Breakfast Program participants',
              'Child and Adult Care participants']

In [9]:
df2 = pd.DataFrame(categories, columns=['Categories'])
df2['Correlation'] = correlation

In [10]:
df2.head()

Unnamed: 0,Categories,Correlation
0,WIC participants,0.349029
1,National School Lunch Program participants,0.356203
2,School Breakfast Program participants,0.438156
3,Child and Adult Care participants,0.306012
