In [2]:
#import dependencies
import pandas as pd
import numpy as np

#### Extract CSVs into DataFrames

In [3]:
# Name of the CSV file
CDC_file = 'Resources/2005-2019-MMR-US-CDC.csv'
ins_file = 'Resources/2019-US-health-insurance-coverage.csv'

In [4]:
# read csv files
cdc_df = pd.read_csv(CDC_file)
ins_df = pd.read_csv(ins_file, names=['Location', 'Employer', 'Non-Group', 'Medicaid', 'Medicare',
       'Military', 'Uninsured', 'Total', 'Footnotes'], header=0)

#### CDC MMR 2005-2019

In [5]:
cdc_df.head()

Unnamed: 0,State,State Code,Year,Deaths,Population,Crude Rate
0,Alabama,1,2005,12,2356423,Unreliable
1,Alabama,1,2006,10,2385480,Unreliable
2,Alabama,1,2015,12,2505795,Unreliable
3,Alabama,1,2016,35,2507714,1.4
4,Alabama,1,2017,41,2514911,1.6


In [95]:
#check column names
cdc_df.columns

Index(['State', 'State Code', 'Year', 'Deaths', 'Population', 'Crude Rate'], dtype='object')

In [9]:
#rename State column to delete extra characters
mmr_df = cdc_df.rename(columns={"ï»¿State": "State"})
mmr_df

Unnamed: 0,State,State Code,Year,Deaths,Population,Crude Rate
0,Alabama,1,2005,12,2356423,Unreliable
1,Alabama,1,2006,10,2385480,Unreliable
2,Alabama,1,2015,12,2505795,Unreliable
3,Alabama,1,2016,35,2507714,1.4
4,Alabama,1,2017,41,2514911,1.6
...,...,...,...,...,...,...
392,Wisconsin,55,2014,16,2898057,Unreliable
393,Wisconsin,55,2015,11,2903737,Unreliable
394,Wisconsin,55,2016,15,2905282,Unreliable
395,Wisconsin,55,2017,12,2912745,Unreliable


In [20]:
#check datatypes
mmr_df.dtypes

State         object
State Code     int64
Year           int64
Deaths         int64
Population     int64
Crude Rate    object
dtype: object

In [11]:
#look for missing values
mmr_df.count()

State         397
State Code    397
Year          397
Deaths        397
Population    397
Crude Rate    397
dtype: int64

In [12]:
#look at "unreliable" values in crude rate (these are used by the CDC for death counts < 20)
mmr_df ["Crude Rate"].value_counts()

Unreliable    160
0.7            41
0.6            38
0.5            33
0.8            22
0.4            18
0.9            17
1.1            17
1              15
1.2             8
1.4             7
1.3             5
1.5             4
1.6             3
2               2
0.3             2
1.7             2
1.8             1
1.9             1
2.3             1
Name: Crude Rate, dtype: int64

In [14]:
#use groupby year to find total deaths for each year
year_totals = mmr_df.groupby(["Year"])

yearly_deaths = year_totals["Deaths"].sum()
yearly_deaths

Year
2005     640
2006     605
2007     666
2008     687
2009     841
2010     718
2011     832
2012     874
2013    1048
2014    1014
2015    1037
2016    1142
2017    1160
2018     874
2019    1010
Name: Deaths, dtype: int64

In [17]:
# make a new dataframe with total deaths by year
year_summary_df = pd.DataFrame({"U.S. Maternal Deaths": yearly_deaths_df})
year_summary_df

Unnamed: 0_level_0,U.S. Maternal Deaths
Year,Unnamed: 1_level_1
2005,640
2006,605
2007,666
2008,687
2009,841
2010,718
2011,832
2012,874
2013,1048
2014,1014


In [12]:
#use groupby state to find total deaths for each state from 2005-2019
state_totals = mmr_df.groupby(["State"])

state_deaths_df = state_totals["Deaths"].sum()
state_deaths_df

State
Alabama            176
Arizona            240
Arkansas           207
California        1363
Colorado            87
Connecticut         25
Florida            858
Georgia            823
Hawaii              12
Idaho               10
Illinois           465
Indiana            388
Iowa                35
Kansas              86
Kentucky           197
Louisiana          331
Maryland           294
Massachusetts       47
Michigan           509
Minnesota          113
Mississippi        188
Missouri           329
Nevada              11
New Jersey         579
New Mexico          45
New York           949
North Carolina     337
Ohio               500
Oklahoma           267
Oregon              44
Pennsylvania       489
South Carolina     291
Tennessee          263
Texas             1809
Utah                86
Virginia           278
Washington         242
West Virginia       10
Wisconsin          165
Name: Deaths, dtype: int64

#### US Insurance Coverage 2019 : Health Insurance Coverage of Females 19-64 [source](https://www.kff.org/other/state-indicator/health-insurance-coverage-of-nonelderly-adult-females/?currentTimeframe=0&sortModel=%7B%22colId%22:%22Location%22,%22sort%22:%22asc%22%7D)

In [19]:
ins_df.head()

Unnamed: 0,Location,Employer,Non-Group,Medicaid,Medicare,Military,Uninsured,Total,Footnotes
0,United States,0.607,0.079,0.165,0.02,0.015,0.114,1.0,1.0
1,Alabama,0.588,0.078,0.139,0.038,0.024,0.133,1.0,
2,Alaska,0.591,0.041,0.17,0.013,0.065,0.119,1.0,
3,Arizona,0.568,0.069,0.188,0.019,0.017,0.138,1.0,
4,Arkansas,0.536,0.076,0.226,0.035,0.015,0.113,1.0,


In [20]:
# check datatypes
ins_df.dtypes

Location      object
Employer     float64
Non-Group    float64
Medicaid     float64
Medicare     float64
Military      object
Uninsured    float64
Total        float64
Footnotes    float64
dtype: object

In [21]:
ins_df.head()

Unnamed: 0,Location,Employer,Non-Group,Medicaid,Medicare,Military,Uninsured,Total,Footnotes
0,United States,0.607,0.079,0.165,0.02,0.015,0.114,1.0,1.0
1,Alabama,0.588,0.078,0.139,0.038,0.024,0.133,1.0,
2,Alaska,0.591,0.041,0.17,0.013,0.065,0.119,1.0,
3,Arizona,0.568,0.069,0.188,0.019,0.017,0.138,1.0,
4,Arkansas,0.536,0.076,0.226,0.035,0.015,0.113,1.0,


In [22]:
#check column names (previously found extra space preceding column names)
ins_df.columns

Index(['Location', 'Employer', 'Non-Group', 'Medicaid', 'Medicare', 'Military',
       'Uninsured', 'Total', 'Footnotes'],
      dtype='object')

In [23]:
ins_clean_df = ins_df.loc[ins_df["Military"]==" N/A"]
ins_clean_df

Unnamed: 0,Location,Employer,Non-Group,Medicaid,Medicare,Military,Uninsured,Total,Footnotes
46,Vermont,0.614,0.069,0.242,0.019,,0.046,1.0,


In [26]:
#remove "N/A" from Military column 
ins_clean_df = ins_df.replace({" N/A": 0})
ins_clean_df

Unnamed: 0,Location,Employer,Non-Group,Medicaid,Medicare,Military,Uninsured,Total,Footnotes
0,United States,0.607,0.079,0.165,0.02,0.015,0.114,1.0,1.0
1,Alabama,0.588,0.078,0.139,0.038,0.024,0.133,1.0,
2,Alaska,0.591,0.041,0.17,0.013,0.065,0.119,1.0,
3,Arizona,0.568,0.069,0.188,0.019,0.017,0.138,1.0,
4,Arkansas,0.536,0.076,0.226,0.035,0.015,0.113,1.0,
5,California,0.57,0.087,0.225,0.013,0.009,0.095,1.0,
6,Colorado,0.635,0.092,0.14,0.013,0.025,0.095,1.0,
7,Connecticut,0.638,0.068,0.204,0.017,0.007,0.066,1.0,
8,Delaware,0.635,0.064,0.184,0.02,0.021,0.075,1.0,
9,District of Columbia,0.659,0.073,0.227,0.006,0.009,0.027,1.0,


In [32]:
# check datatypes
ins_clean_df.dtypes

Location      object
Employer     float64
Non-Group    float64
Medicaid     float64
Medicare     float64
Military     float64
Uninsured    float64
Total        float64
Footnotes    float64
dtype: object

In [28]:
# use pd.to_numeric() method to convert the datatype of the Military column from object to float
ins_clean_df["Military"] = pd.to_numeric(ins_clean_df["Military"])

In [31]:
# verify data types 
ins_clean_df.dtypes

Location      object
Employer     float64
Non-Group    float64
Medicaid     float64
Medicare     float64
Military     float64
Uninsured    float64
Total        float64
Footnotes    float64
dtype: object

In [42]:
#convert values to percentages and then use .style.format({"column name" : "{:,.2f}%"})
employer_per = ins_clean_df["Employer"] * 100
employer_per

non_group_per = ins_clean_df["Non-Group"] * 100
non_group_per

medicaid_per = ins_clean_df["Medicaid"] * 100
medicaid_per

medicare_per = ins_clean_df["Medicare"] * 100
medicare_per

military_per = ins_clean_df["Military"] * 100
military_per

uninsured_per = ins_clean_df["Uninsured"] * 100
uninsured_per

total_per = ins_clean_df["Total"] * 100
total_per

0     100.0
1     100.0
2     100.0
3     100.0
4     100.0
5     100.0
6     100.0
7     100.0
8     100.0
9     100.0
10    100.0
11    100.0
12    100.0
13    100.0
14    100.0
15    100.0
16    100.0
17    100.0
18    100.0
19    100.0
20    100.0
21    100.0
22    100.0
23    100.0
24    100.0
25    100.0
26    100.0
27    100.0
28    100.0
29    100.0
30    100.0
31    100.0
32    100.0
33    100.0
34    100.0
35    100.0
36    100.0
37    100.0
38    100.0
39    100.0
40    100.0
41    100.0
42    100.0
43    100.0
44    100.0
45    100.0
46    100.0
47    100.0
48    100.0
49    100.0
50    100.0
51    100.0
52    100.0
Name: Total, dtype: float64

In [44]:
# assign locations to list for new dataframe
locations = ins_clean_df["Location"]
locations

0             United States
1                   Alabama
2                    Alaska
3                   Arizona
4                  Arkansas
5                California
6                  Colorado
7               Connecticut
8                  Delaware
9      District of Columbia
10                  Florida
11                  Georgia
12                   Hawaii
13                    Idaho
14                 Illinois
15                  Indiana
16                     Iowa
17                   Kansas
18                 Kentucky
19                Louisiana
20                    Maine
21                 Maryland
22            Massachusetts
23                 Michigan
24                Minnesota
25              Mississippi
26                 Missouri
27                  Montana
28                 Nebraska
29                   Nevada
30            New Hampshire
31               New Jersey
32               New Mexico
33                 New York
34           North Carolina
35             North

In [48]:
# make a new dataframe with percentages and without the footnotes column
us_ins_2019_df = pd.DataFrame({'Location': locations,
                               'Year': '2019',
                               'Employer': employer_per, 
                               'Non-Group': non_group_per,
                               'Medicaid': medicaid_per,
                               'Medicare': medicare_per, 
                               'Military': military_per,
                               'Uninsured': uninsured_per, 
                               'Total': total_per})
us_ins_2019_df

Unnamed: 0,Location,Year,Employer,Non-Group,Medicaid,Medicare,Military,Uninsured,Total
0,United States,2019,60.7,7.9,16.5,2.0,1.5,11.4,100.0
1,Alabama,2019,58.8,7.8,13.9,3.8,2.4,13.3,100.0
2,Alaska,2019,59.1,4.1,17.0,1.3,6.5,11.9,100.0
3,Arizona,2019,56.8,6.9,18.8,1.9,1.7,13.8,100.0
4,Arkansas,2019,53.6,7.6,22.6,3.5,1.5,11.3,100.0
5,California,2019,57.0,8.7,22.5,1.3,0.9,9.5,100.0
6,Colorado,2019,63.5,9.2,14.0,1.3,2.5,9.5,100.0
7,Connecticut,2019,63.8,6.8,20.4,1.7,0.7,6.6,100.0
8,Delaware,2019,63.5,6.4,18.4,2.0,2.1,7.5,100.0
9,District of Columbia,2019,65.9,7.3,22.7,0.6,0.9,2.7,100.0


#### US insurance coverage 2013 : Health Insurance Coverage of Females 19-64 [source](https://www.kff.org/other/state-indicator/health-insurance-coverage-of-nonelderly-adult-females/?currentTimeframe=6&sortModel=%7B%22colId%22:%22Location%22,%22sort%22:%22asc%22%7D)