In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# CSV files to import

MMR_totals = "Cleaned_Data/2009_2019_CDC_MMR.csv"
M_Deaths_race = "Resources_P3/Maternal_Deaths_race_2009-2019.csv"
Births = "Resources_P3/Births_for_calculation_2009-2019.csv"
M_race_births = "Resources_P3/Births_Maternal_race_2009-2019.csv"

In [3]:
# read csv files
MMR_totals = pd.read_csv(MMR_totals)
deaths_df = pd.read_csv(M_Deaths_race)
births_df = pd.read_csv(Births)
births_mat_race = pd.read_csv(M_race_births)

#### CDC Maternal Deaths by race 2009-2019 [source](https://wonder.cdc.gov/ucd-icd10.html)

In [13]:
deaths_df.head()

Unnamed: 0,State,State Code,Year,Race,Race Code,Hispanic Origin,Hispanic Origin Code,Deaths,Population,Crude Rate
0,Alabama,1,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,0,Unreliable
1,Alabama,1,2016,White,2106-3,Not Hispanic or Latino,2186-2,18,0,Unreliable
2,Alabama,1,2017,Black or African American,2054-5,Not Hispanic or Latino,2186-2,25,704201,3.6
3,Alabama,1,2017,White,2106-3,Not Hispanic or Latino,2186-2,15,1655605,Unreliable
4,Alabama,1,2018,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,706754,Unreliable


In [23]:
# change "Population" column to "Population by Race"
deaths_race_df = deaths_df.rename(columns={"State": "state",
                                           "State Code": "state_code",
                                           "Year": "year",
                                           "Population": "population_by_race",
                                           "Deaths": "deaths_by_race"})
deaths_race_df.head(50)

Unnamed: 0,state,state_code,year,Race,Race Code,Hispanic Origin,Hispanic Origin Code,deaths_by_race,population_by_race,Crude Rate
0,Alabama,1,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,0,Unreliable
1,Alabama,1,2016,White,2106-3,Not Hispanic or Latino,2186-2,18,0,Unreliable
2,Alabama,1,2017,Black or African American,2054-5,Not Hispanic or Latino,2186-2,25,704201,3.6
3,Alabama,1,2017,White,2106-3,Not Hispanic or Latino,2186-2,15,1655605,Unreliable
4,Alabama,1,2018,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,706754,Unreliable
5,Alabama,1,2018,White,2106-3,Not Hispanic or Latino,2186-2,16,1657262,Unreliable
6,Alabama,1,2019,Black or African American,2054-5,Not Hispanic or Latino,2186-2,15,709544,Unreliable
7,Alabama,1,2019,White,2106-3,Not Hispanic or Latino,2186-2,17,1660160,Unreliable
8,Arizona,4,2013,White,2106-3,Not Hispanic or Latino,2186-2,13,0,Unreliable
9,Arizona,4,2014,White,2106-3,Not Hispanic or Latino,2186-2,12,1947854,Unreliable


In [33]:
# dataframe for births by maternal race
# change race column names to match for merging
# noting that for births the race is the maternal race
births_mat_race.head(50)
births_by_race = births_mat_race.rename(columns={"Births": "births_by_race", 
                                    "Mother's Bridged Race": "Race", 
                                    "Mother's Bridged Race Code": "Race Code", 
                                    "Mother's Hispanic Origin": "Hispanic Origin",
                                    "Mother's Hispanic Origin Code": "Hispanic Origin Code",
                                    "State": "state",
                                    "State Code": "state_code",
                                    "Year": "year"})
births_by_race.head(50)
births_by_race.loc[births_by_race["year"] == 2016]

Unnamed: 0,state,state_code,year,Race,Race Code,Hispanic Origin,Hispanic Origin Code,births_by_race
3510,Alabama,1,2016,American Indian or Alaska Native,1002-5,Hispanic or Latino,2135-2,22
3511,Alabama,1,2016,American Indian or Alaska Native,1002-5,Not Hispanic or Latino,2186-5,139
3512,Alabama,1,2016,Asian or Pacific Islander,A-PI,Hispanic or Latino,2135-2,170
3513,Alabama,1,2016,Asian or Pacific Islander,A-PI,Not Hispanic or Latino,2186-5,1108
3514,Alabama,1,2016,Black or African American,2054-5,Hispanic or Latino,2135-2,494
...,...,...,...,...,...,...,...,...
4014,Wyoming,56,2016,Black or African American,2054-5,Hispanic or Latino,2135-2,33
4015,Wyoming,56,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-5,91
4016,Wyoming,56,2016,White,2106-3,Hispanic or Latino,2135-2,893
4017,Wyoming,56,2016,White,2106-3,Not Hispanic or Latino,2186-5,5834


In [34]:
# merge deaths and births data stratified by race
race_birth_death_df = pd.merge(deaths_race_df, births_by_race, how="left", on=["state", "year", "Race Code", "Hispanic Origin Code"])
race_birth_death_df.head()

Unnamed: 0,state,state_code_x,year,Race_x,Race Code,Hispanic Origin_x,Hispanic Origin Code,deaths_by_race,population_by_race,Crude Rate,state_code_y,Race_y,Hispanic Origin_y,births_by_race
0,Alabama,1,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,0,Unreliable,,,,
1,Alabama,1,2016,White,2106-3,Not Hispanic or Latino,2186-2,18,0,Unreliable,,,,
2,Alabama,1,2017,Black or African American,2054-5,Not Hispanic or Latino,2186-2,25,704201,3.6,,,,
3,Alabama,1,2017,White,2106-3,Not Hispanic or Latino,2186-2,15,1655605,Unreliable,,,,
4,Alabama,1,2018,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,706754,Unreliable,,,,


In [55]:
race_birth_death_df = deaths_race_df.merge(births_by_race, how="left", on=["state", "year", "Race", "Hispanic Origin"])
race_birth_death_df.head()

# clean up by selecting only the columns we need
race_birth_death_df_clean = race_birth_death_df[["state", "state_code_x", "year", "Race",\
                                                "Race Code_x", "Hispanic Origin",\
                                                "Hispanic Origin Code_x", "deaths_by_race",\
                                                "population_by_race", "births_by_race"]]
race_birth_death_df_clean.head()

# clean by renaming columns
race_b_d_df = race_birth_death_df_clean.rename(columns={"Race": "race", 
                                    "Race Code_x": "race_code", 
                                    "Hispanic Origin": "hispanic_origin",             
                                    "Hispanic Origin Code_x":"hispanic_origin_code",
                                    "state_code_x": "state_code"})
race_b_d_df

Unnamed: 0,state,state_code,year,race,race_code,hispanic_origin,hispanic_origin_code,deaths_by_race,population_by_race,births_by_race
0,Alabama,1,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,0,17989
1,Alabama,1,2016,White,2106-3,Not Hispanic or Latino,2186-2,18,0,35319
2,Alabama,1,2017,Black or African American,2054-5,Not Hispanic or Latino,2186-2,25,704201,18354
3,Alabama,1,2017,White,2106-3,Not Hispanic or Latino,2186-2,15,1655605,34784
4,Alabama,1,2018,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,706754,17939
...,...,...,...,...,...,...,...,...,...,...
383,West Virginia,54,2018,White,2106-3,Not Hispanic or Latino,2186-2,10,853052,16785
384,Wisconsin,55,2009,White,2106-3,Not Hispanic or Latino,2186-2,10,0,52462
385,Wisconsin,55,2011,White,2106-3,Not Hispanic or Latino,2186-2,10,0,50500
386,Wisconsin,55,2014,White,2106-3,Not Hispanic or Latino,2186-2,13,2409803,49440


In [10]:
MMR_totals.head()

Unnamed: 0,state,state_code,year,deaths,births,maternal_mortality_ratio,population
0,Alabama,1,2015,12,59657,20.11,2505795
1,Alabama,1,2016,35,59151,59.17,2507714
2,Alabama,1,2017,41,58941,69.56,2514911
3,Alabama,1,2018,31,57761,53.67,2523756
4,Alabama,1,2019,35,58615,59.71,2533668


In [11]:
MMR_totals_us = MMR_totals.rename(columns={"population": "total_population",
                                           "maternal_mortality_ratio": "total_mmr",
                                           "deaths": "total_deaths", 
                                           "births": "total_births"})
MMR_totals_us

Unnamed: 0,state,state_code,year,total_deaths,total_births,total_mmr,total_population
0,Alabama,1,2015,12,59657,20.11,2505795
1,Alabama,1,2016,35,59151,59.17,2507714
2,Alabama,1,2017,41,58941,69.56,2514911
3,Alabama,1,2018,31,57761,53.67,2523756
4,Alabama,1,2019,35,58615,59.71,2533668
...,...,...,...,...,...,...,...
303,Wisconsin,55,2014,16,67161,23.82,2898057
304,Wisconsin,55,2015,11,67041,16.41,2903737
305,Wisconsin,55,2016,15,66615,22.52,2905282
306,Wisconsin,55,2017,12,64975,18.47,2912745


In [59]:
CDC_MMR_race_total = race_b_d_df.merge(MMR_totals_us, how="left", on=["state", "year"])
CDC_MMR_race_total.head()

Unnamed: 0,state,state_code_x,year,race,race_code,hispanic_origin,hispanic_origin_code,deaths_by_race,population_by_race,births_by_race,state_code_y,total_deaths,total_births,total_mmr,total_population
0,Alabama,1,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,0,17989,1,35,59151,59.17,2507714
1,Alabama,1,2016,White,2106-3,Not Hispanic or Latino,2186-2,18,0,35319,1,35,59151,59.17,2507714
2,Alabama,1,2017,Black or African American,2054-5,Not Hispanic or Latino,2186-2,25,704201,18354,1,41,58941,69.56,2514911
3,Alabama,1,2017,White,2106-3,Not Hispanic or Latino,2186-2,15,1655605,34784,1,41,58941,69.56,2514911
4,Alabama,1,2018,Black or African American,2054-5,Not Hispanic or Latino,2186-2,14,706754,17939,1,31,57761,53.67,2523756


In [92]:
# clean up by selecting only the columns we need
CDC_MMR_race_total_clean = CDC_MMR_race_total[["state", "state_code_x", "year", "race",\
                                                "race_code", "hispanic_origin",\
                                                "hispanic_origin_code","total_births",\
                                                "births_by_race", "total_deaths",\
                                                "deaths_by_race", "total_mmr",\
                                                "total_population","population_by_race"]]
CDC_MMR_race_total_clean.head()

# rename column
MMR_race_df = CDC_MMR_race_total_clean.rename(columns={"state_code_x": "state_code"})
MMR_race_df.head()

Unnamed: 0,state,state_code,year,race,race_code,hispanic_origin,hispanic_origin_code,total_births,births_by_race,total_deaths,deaths_by_race,total_mmr,total_population,population_by_race
0,Alabama,1,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-2,59151,17989,35,14,59.17,2507714,0
1,Alabama,1,2016,White,2106-3,Not Hispanic or Latino,2186-2,59151,35319,35,18,59.17,2507714,0
2,Alabama,1,2017,Black or African American,2054-5,Not Hispanic or Latino,2186-2,58941,18354,41,25,69.56,2514911,704201
3,Alabama,1,2017,White,2106-3,Not Hispanic or Latino,2186-2,58941,34784,41,15,69.56,2514911,1655605
4,Alabama,1,2018,Black or African American,2054-5,Not Hispanic or Latino,2186-2,57761,17939,31,14,53.67,2523756,706754


In [72]:
# find weighted value for births by race divided by total births
weight = MMR_race_df["births_by_race"] / MMR_race_df["total_births"]
weight

0      0.304120
1      0.597099
2      0.311396
3      0.590149
4      0.310573
         ...   
383    0.919827
384    0.740539
385    0.744728
386    0.736142
387    0.731254
Length: 388, dtype: float64

In [94]:
weighted_for_race = weight * MMR_race_df["births_by_race"]
weighted_for_race

0       5470.814035
1      21088.937820
2       5715.364789
3      20527.759217
4       5571.366857
           ...     
383    15439.293347
384    38850.153777
385    37608.759770
386    36394.836289
387    35848.996525
Length: 388, dtype: float64

In [77]:
ratio = round(MMR_race_df["deaths_by_race"] / weighted_for_race * 100000, 2)
ratio

# maternal deaths / births for same period * 100000

0      255.90
1       85.35
2      437.42
3       73.07
4      251.28
        ...  
383     64.77
384     25.74
385     26.59
386     35.72
387     27.89
Length: 388, dtype: float64

In [87]:
# add ratio as a column to our df

MMR_race_df["mmr_by_race"] = ratio
MMR_race_df.head()

# reorder columns

MMR_race_final_df = MMR_race_df[["state", "state_code", "year", "race",\
                                                "race_code", "hispanic_origin",\
                                                "hispanic_origin_code","total_births",\
                                                "births_by_race", "total_deaths",\
                                                "deaths_by_race", "total_mmr", "mmr_by_race",\
                                                "total_population","population_by_race"]]
MMR_race_final_df.head()

Unnamed: 0,state,state_code,year,race,race_code,hispanic_origin,hispanic_origin_code,total_births,births_by_race,total_deaths,deaths_by_race,total_mmr,mmr_by_race,total_population,population_by_race
0,Alabama,1,2016,Black or African American,2054-5,Not Hispanic or Latino,2186-2,59151,17989,35,14,59.17,255.9,2507714,0
1,Alabama,1,2016,White,2106-3,Not Hispanic or Latino,2186-2,59151,35319,35,18,59.17,85.35,2507714,0
2,Alabama,1,2017,Black or African American,2054-5,Not Hispanic or Latino,2186-2,58941,18354,41,25,69.56,437.42,2514911,704201
3,Alabama,1,2017,White,2106-3,Not Hispanic or Latino,2186-2,58941,34784,41,15,69.56,73.07,2514911,1655605
4,Alabama,1,2018,Black or African American,2054-5,Not Hispanic or Latino,2186-2,57761,17939,31,14,53.67,251.28,2523756,706754


#### Exploratory Data Analysis: MMR by race (weighted) for years 2009-2019

In [88]:
MMR_race_final_df.groupby(["race", "hispanic_origin"])["mmr_by_race"].mean()

race                       hispanic_origin       
Asian or Pacific Islander  Not Hispanic or Latino    112.221667
Black or African American  Not Hispanic or Latino    428.046950
White                      Hispanic or Latino         80.610789
                           Not Hispanic or Latino     54.443547
Name: mmr_by_race, dtype: float64