In [355]:
# UCSD ECE 143 Programming for Data Analysis
# Winter 2023
# Author: m3cheung@ucsd.edu
# Final Project: create morbidity by education level and marital status
#                bar graph

Import modules

In [356]:
import pandas as pd
import plotly.express as px
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import sys

Specify year to analyze

In [357]:
year = 2021.0

Load csv files into Pandas DataFrames

In [358]:
morbidity_df = pd.read_csv(\
    "../data_csv/edu_marital-mobidity.csv").drop(columns=["Notes", "Mother's Education Code", "Year Code", "Marital Status Code", "Mother's Single Race 6 Code", "Mother's Hispanic Origin Code"])
total_births_df = pd.read_csv(\
    "../data_csv/edu_marital-total_births.csv").drop(columns=["Notes", "Mother's Education Code", "Year Code", "Marital Status Code", "Mother's Single Race 6 Code", "Mother's Hispanic Origin Code"])

display(morbidity_df)
display(total_births_df)


Unnamed: 0,Mother's Education,Marital Status,Mother's Single Race 6,Mother's Hispanic Origin,Year,Births
0,8th grade or less,Married,Asian,Not Hispanic or Latino,2018.0,95.0
1,8th grade or less,Married,Asian,Not Hispanic or Latino,2019.0,78.0
2,8th grade or less,Married,Asian,Not Hispanic or Latino,2020.0,79.0
3,8th grade or less,Married,Asian,Not Hispanic or Latino,2021.0,51.0
4,8th grade or less,Married,Black or African American,Hispanic or Latino,2018.0,11.0
...,...,...,...,...,...,...
483,,,,,,
484,,,,,,
485,,,,,,
486,,,,,,


Unnamed: 0,Mother's Education,Marital Status,Mother's Single Race 6,Mother's Hispanic Origin,Year,Births
0,8th grade or less,Married,American Indian or Alaska Native,Hispanic or Latino,2018.0,212.0
1,8th grade or less,Married,American Indian or Alaska Native,Hispanic or Latino,2019.0,256.0
2,8th grade or less,Married,American Indian or Alaska Native,Hispanic or Latino,2020.0,270.0
3,8th grade or less,Married,American Indian or Alaska Native,Hispanic or Latino,2021.0,233.0
4,8th grade or less,Married,American Indian or Alaska Native,Not Hispanic or Latino,2018.0,64.0
...,...,...,...,...,...,...
1028,,,,,,
1029,,,,,,
1030,,,,,,
1031,,,,,,


Clean up data

In [359]:
# # remove NaN rows
# morbidity_df = morbidity_df.dropna()
# total_births_df = total_births_df.dropna()

# only work on year
morbidity_df = morbidity_df[morbidity_df["Year"] == year]
total_births_df = total_births_df[total_births_df["Year"] == year]

# reset indices to start from 0
total_births_df = total_births_df.reset_index(drop=True)
morbidity_df = morbidity_df.reset_index(drop=True)

# create new df for the ratio!
ratio_df = total_births_df.merge(morbidity_df, on=["Mother's Education", "Marital Status", "Mother's Single Race 6", "Mother's Hispanic Origin", "Year"], how="left")

# rename columns
ratio_df = ratio_df.rename(columns={"Births_x":"Total Births", "Births_y":"Births w/ Morbidity"})

# remove NaN rows
ratio_df = ratio_df.dropna()

display(ratio_df)

# isolate hispanic origin rows
hisp_df = ratio_df[ratio_df["Mother's Hispanic Origin"] == "Hispanic or Latino"]
ratio_df = ratio_df[ratio_df["Mother's Hispanic Origin"] == "Not Hispanic or Latino"]

# find sum of Births and Morbidity columns by Marital Status and Edu Lvl
hisp_df["Mother\'s Single Race 6"] = "Hispanic Origin"
hisp_df = hisp_df.groupby(["Mother's Education", "Marital Status"]).aggregate({"Mother's Single Race 6":"first", "Year":"first", "Total Births": "sum", "Births w/ Morbidity": "sum"}).reset_index()

# add rows to include hispanic origin for year
ratio_df = pd.concat([ratio_df, hisp_df]).drop(columns="Mother's Hispanic Origin").reset_index(drop=True)


display(ratio_df)

Unnamed: 0,Mother's Education,Marital Status,Mother's Single Race 6,Mother's Hispanic Origin,Year,Total Births,Births w/ Morbidity
3,8th grade or less,Married,Asian,Not Hispanic or Latino,2021.0,2890.0,51.0
5,8th grade or less,Married,Black or African American,Hispanic or Latino,2021.0,1078.0,12.0
6,8th grade or less,Married,Black or African American,Not Hispanic or Latino,2021.0,3030.0,64.0
10,8th grade or less,Married,White,Hispanic or Latino,2021.0,16895.0,186.0
11,8th grade or less,Married,White,Not Hispanic or Latino,2021.0,19023.0,239.0
...,...,...,...,...,...,...,...
237,"Doctorate (PhD, EdD) or Professional Degree (M...",Married,White,Not Hispanic or Latino,2021.0,59180.0,1116.0
240,"Doctorate (PhD, EdD) or Professional Degree (M...",Married,More than one race,Not Hispanic or Latino,2021.0,1414.0,29.0
242,"Doctorate (PhD, EdD) or Professional Degree (M...",Unmarried,Asian,Not Hispanic or Latino,2021.0,309.0,10.0
244,"Doctorate (PhD, EdD) or Professional Degree (M...",Unmarried,Black or African American,Not Hispanic or Latino,2021.0,838.0,12.0


Unnamed: 0,Mother's Education,Marital Status,Mother's Single Race 6,Year,Total Births,Births w/ Morbidity
0,8th grade or less,Married,Asian,2021.0,2890.0,51.0
1,8th grade or less,Married,Black or African American,2021.0,3030.0,64.0
2,8th grade or less,Married,White,2021.0,19023.0,239.0
3,8th grade or less,Unmarried,Black or African American,2021.0,2343.0,44.0
4,8th grade or less,Unmarried,White,2021.0,3966.0,46.0
...,...,...,...,...,...,...
85,High school graduate or GED completed,Unmarried,Hispanic Origin,2021.0,123379.0,1330.0
86,"Master's degree (MA, MS, MEng, MEd, MSW, MBA)",Married,Hispanic Origin,2021.0,20196.0,291.0
87,"Master's degree (MA, MS, MEng, MEd, MSW, MBA)",Unmarried,Hispanic Origin,2021.0,3826.0,50.0
88,"Some college credit, but not a degree",Married,Hispanic Origin,2021.0,56166.0,640.0


Calculate Ratio for each row (The numbers end up being so high though!)

In [360]:
# create column for MMR = (Deaths/Live Births * 100,000)
ratio_df["Maternal Morbidity Ratio per 100,000 Live Births"] = round(
    ratio_df["Births w/ Morbidity"] / ratio_df["Total Births"] * 100000)

display(ratio_df)

Unnamed: 0,Mother's Education,Marital Status,Mother's Single Race 6,Year,Total Births,Births w/ Morbidity,"Maternal Morbidity Ratio per 100,000 Live Births"
0,8th grade or less,Married,Asian,2021.0,2890.0,51.0,1765.0
1,8th grade or less,Married,Black or African American,2021.0,3030.0,64.0,2112.0
2,8th grade or less,Married,White,2021.0,19023.0,239.0,1256.0
3,8th grade or less,Unmarried,Black or African American,2021.0,2343.0,44.0,1878.0
4,8th grade or less,Unmarried,White,2021.0,3966.0,46.0,1160.0
...,...,...,...,...,...,...,...
85,High school graduate or GED completed,Unmarried,Hispanic Origin,2021.0,123379.0,1330.0,1078.0
86,"Master's degree (MA, MS, MEng, MEd, MSW, MBA)",Married,Hispanic Origin,2021.0,20196.0,291.0,1441.0
87,"Master's degree (MA, MS, MEng, MEd, MSW, MBA)",Unmarried,Hispanic Origin,2021.0,3826.0,50.0,1307.0
88,"Some college credit, but not a degree",Married,Hispanic Origin,2021.0,56166.0,640.0,1139.0


Do it by percentage instead?

In [361]:
ratio_df["Percentage of Live Births in Population"] =  ratio_df["Births w/ Morbidity"] / ratio_df["Total Births"] * 100000

display(ratio_df)

Unnamed: 0,Mother's Education,Marital Status,Mother's Single Race 6,Year,Total Births,Births w/ Morbidity,"Maternal Morbidity Ratio per 100,000 Live Births",Percentage of Live Births in Population
0,8th grade or less,Married,Asian,2021.0,2890.0,51.0,1765.0,1764.705882
1,8th grade or less,Married,Black or African American,2021.0,3030.0,64.0,2112.0,2112.211221
2,8th grade or less,Married,White,2021.0,19023.0,239.0,1256.0,1256.373863
3,8th grade or less,Unmarried,Black or African American,2021.0,2343.0,44.0,1878.0,1877.934272
4,8th grade or less,Unmarried,White,2021.0,3966.0,46.0,1160.0,1159.858800
...,...,...,...,...,...,...,...,...
85,High school graduate or GED completed,Unmarried,Hispanic Origin,2021.0,123379.0,1330.0,1078.0,1077.979235
86,"Master's degree (MA, MS, MEng, MEd, MSW, MBA)",Married,Hispanic Origin,2021.0,20196.0,291.0,1441.0,1440.879382
87,"Master's degree (MA, MS, MEng, MEd, MSW, MBA)",Unmarried,Hispanic Origin,2021.0,3826.0,50.0,1307.0,1306.847883
88,"Some college credit, but not a degree",Married,Hispanic Origin,2021.0,56166.0,640.0,1139.0,1139.479400


Create Map

In [365]:
fig = px.bar(ratio_df[ratio_df["Marital Status"] == "Married"], x="Mother's Education", y="Percentage of Live Births in Population", color="Mother's Single Race 6", title="", height=800,
             width=1000, category_orders={"Single Race 6": ["More than one race", "Asian", "Hispanic Origin", "White", "Black or African American", "American Indian or Alaska Native"].reverse()})
fig.show()
