In [30]:
import pandas as pd 
import numpy as np

In [31]:
# CSV files to import

M_Deaths_race = "Resources_P3/Maternal_Deaths_race_2009-2019.csv"
Births = "Resources_P3/Births_for_calculation_2009-2019.csv"
M_Deaths_single_race = "Resources_P3/Single_Race_15_Maternal_Deaths_2018-2019.csv"

In [32]:
# read csv files
deaths_df = pd.read_csv(M_Deaths_race)
births_df = pd.read_csv(Births)
single_race_deaths = pd.read_csv(M_Deaths_single_race)

#### CDC Maternal Deaths 2009-2019 [source](https://wonder.cdc.gov/ucd-icd10.html)

In [36]:
deaths_df.head()

Unnamed: 0,State,State Code,Year,Race,Race Code,Hispanic Origin,Hispanic Origin Code,Deaths,Population,Crude Rate
0,Alabama,1,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,0,Unreliable
1,Alabama,1,2016,White,2106-3,Not Hispanic or Latino,2186-2,18,0,Unreliable
2,Alabama,1,2017,Black or African American,2054-5,Not Hispanic or Latino,2186-2,25,704201,3.6
3,Alabama,1,2017,White,2106-3,Not Hispanic or Latino,2186-2,15,1655605,Unreliable
4,Alabama,1,2018,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,706754,Unreliable


#### CDC Natality 2009-2019 (not separated by race, for calculation) [source](https://wonder.cdc.gov/natality-current.html)

In [37]:
births_df.head()

Unnamed: 0,Year,State,State Code,Births
0,2009,Alabama,1,62475
1,2009,Alaska,2,11324
2,2009,Arizona,4,92798
3,2009,Arkansas,5,39808
4,2009,California,6,527020


In [38]:
# merge two dataframes 
CDC_MMR_data = pd.merge(deaths_df, births_df, how="inner", on=["State", "State Code", "Year"])
CDC_MMR_data.head()

Unnamed: 0,State,State Code,Year,Race,Race Code,Hispanic Origin,Hispanic Origin Code,Deaths,Population,Crude Rate,Births
0,Alabama,1,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,0,Unreliable,59151
1,Alabama,1,2016,White,2106-3,Not Hispanic or Latino,2186-2,18,0,Unreliable,59151
2,Alabama,1,2017,Black or African American,2054-5,Not Hispanic or Latino,2186-2,25,704201,3.6,58941
3,Alabama,1,2017,White,2106-3,Not Hispanic or Latino,2186-2,15,1655605,Unreliable,58941
4,Alabama,1,2018,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,706754,Unreliable,57761


In [22]:
ratio = round(CDC_MMR_data["Deaths"] / CDC_MMR_data["Births"] * 100000, 2)
ratio

0      23.67
1      30.43
2      42.42
3      25.45
4      24.24
5      27.70
6      25.59
7      29.00
8      15.19
9      13.81
10     13.01
11     14.66
12     12.39
13     12.39
14     12.60
15     30.14
16     28.69
17     44.14
18     31.35
19     27.01
20      3.23
21     10.25
22      5.31
23      2.16
24      2.94
25      7.25
26      3.53
27      3.39
28      4.18
29      4.58
30      2.78
31      6.75
32      4.96
33      2.02
34      3.84
35      4.65
36      5.05
37      2.19
38      3.98
39      6.36
40      4.77
41      3.66
42      5.90
43      5.69
44      3.27
45      2.45
46      8.59
47      4.30
48      4.03
49      8.69
50      4.45
51      3.08
52      2.64
53      6.81
54      4.84
55      2.91
56      2.46
57      5.82
58      4.26
59     18.23
60     10.84
61      7.68
62     13.10
63      9.32
64      8.39
65     11.71
66      7.97
67     14.54
68     12.20
69     12.53
70     10.21
71     14.39
72     11.36
73     10.91
74     10.70
75     14.71
76      7.11

In [39]:
CDC_MMR_data["Maternal Mortality Ratio"] = ratio
CDC_MMR_data.head()

Unnamed: 0,State,State Code,Year,Race,Race Code,Hispanic Origin,Hispanic Origin Code,Deaths,Population,Crude Rate,Births,Maternal Mortality Ratio
0,Alabama,1,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,0,Unreliable,59151,23.67
1,Alabama,1,2016,White,2106-3,Not Hispanic or Latino,2186-2,18,0,Unreliable,59151,30.43
2,Alabama,1,2017,Black or African American,2054-5,Not Hispanic or Latino,2186-2,25,704201,3.6,58941,42.42
3,Alabama,1,2017,White,2106-3,Not Hispanic or Latino,2186-2,15,1655605,Unreliable,58941,25.45
4,Alabama,1,2018,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,706754,Unreliable,57761,24.24


In [40]:
CDC_MMR_final_df = CDC_MMR_data[["State", "State Code", "Year", "Race", "Race Code", "Hispanic Origin", "Hispanic Origin Code", "Deaths", "Births", "Maternal Mortality Ratio", "Population"]]
CDC_MMR_final_df.head()

Unnamed: 0,State,State Code,Year,Race,Race Code,Hispanic Origin,Hispanic Origin Code,Deaths,Births,Maternal Mortality Ratio,Population
0,Alabama,1,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,59151,23.67,0
1,Alabama,1,2016,White,2106-3,Not Hispanic or Latino,2186-2,18,59151,30.43,0
2,Alabama,1,2017,Black or African American,2054-5,Not Hispanic or Latino,2186-2,25,58941,42.42,704201
3,Alabama,1,2017,White,2106-3,Not Hispanic or Latino,2186-2,15,58941,25.45,1655605
4,Alabama,1,2018,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,57761,24.24,706754


In [41]:
#rename columns for database use
CDC_MMR_by_race_df = CDC_MMR_final_df.rename(columns={"State": "state",
                                                    "State Code": "state_code", 
                                                    "Year": "year",
                                                    "Race": "race",
                                                    "Race Code": "race_code",
                                                    "Hispanic Origin": "hispanic_origin",
                                                    "Hispanic Origin Code": "hispanic_origin_code",
                                                    "Deaths": "deaths",
                                                    "Births": "births",
                                                    "Maternal Mortality Ratio": "maternal_mortality_ratio",
                                                    "Population": "population"})
CDC_MMR_by_race_df.head()

Unnamed: 0,state,state_code,year,race,race_code,hispanic_origin,hispanic_origin_code,deaths,births,maternal_mortality_ratio,population
0,Alabama,1,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,59151,23.67,0
1,Alabama,1,2016,White,2106-3,Not Hispanic or Latino,2186-2,18,59151,30.43,0
2,Alabama,1,2017,Black or African American,2054-5,Not Hispanic or Latino,2186-2,25,58941,42.42,704201
3,Alabama,1,2017,White,2106-3,Not Hispanic or Latino,2186-2,15,58941,25.45,1655605
4,Alabama,1,2018,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,57761,24.24,706754


#### CDC Maternal Deaths with Single-Race Request option (only available for years 2018-2019)[source](https://wonder.cdc.gov/ucd-icd10-expanded.html)

In [42]:
single_race_deaths

Unnamed: 0,State,State Code,Year,Single Race 15,Single Race 15 Code,Hispanic Origin,Hispanic Origin Code,Deaths
0,Alabama,1,2018,White,1,Not Hispanic or Latino,2186-2,16
1,Alabama,1,2018,Black,2,Not Hispanic or Latino,2186-2,14
2,Arizona,4,2018,White,1,Hispanic or Latino,2135-2,10
3,Arizona,4,2018,White,1,Not Hispanic or Latino,2186-2,10
4,Arkansas,5,2018,White,1,Not Hispanic or Latino,2186-2,10
5,California,6,2018,White,1,Hispanic or Latino,2135-2,31
6,California,6,2018,White,1,Not Hispanic or Latino,2186-2,22
7,California,6,2018,Black,2,Not Hispanic or Latino,2186-2,10
8,Florida,12,2018,White,1,Hispanic or Latino,2135-2,10
9,Florida,12,2018,White,1,Not Hispanic or Latino,2186-2,22


#### Export csv of MMR separated by race for 2009-2019

In [43]:
# export file as a CSV, without the Pandas Index, but with the header
CDC_MMR_by_race_df.to_csv("Cleaned_Data/MMR_by_race_ML.csv", index=False, header=True, float_format='%.2f')