# Visualization of Infant, Neonatal, Postneonatal, Fetal, and Perinatal Mortality from 1983 to 2018
This is an interactive visualization of the data located at https://catalog.data.gov/dataset/infant-neonatal-postneonatal-fetal-and-perinatal-mortality-rates-by-detailed-race-and-hisp-016ed .
It includes step-by-step instructions for building said visualization.

First, we import necessary libraries for python, and check our data header.

In [285]:
# Import our data processing library
import pandas as pd
import io
import requests

# Use to read in dataset from above website
url = "https://data.cdc.gov/api/views/nfuu-hu6j/rows.csv?accessType=DOWNLOAD"
content = requests.get(url).content
df = pd.read_csv(io.StringIO(content.decode('utf-8')))

# Import altair to visualize data
import altair as alt

df.head()

Unnamed: 0,INDICATOR,INDICATOR_NUM,UNIT,UNIT_NUM,STUB_NAME,STUB_NAME_NUM,STUB_LABEL,STUB_LABEL_NUM,YEAR,YEAR_NUM,ESTIMATE,FLAG
0,"Infant, neonatal, postneonatal, fetal, and per...",1,"Infant deaths per 1,000 live births",1,Total,0,All mothers,0.1,1983,1,10.9,
1,"Infant, neonatal, postneonatal, fetal, and per...",1,"Infant deaths per 1,000 live births",1,Total,0,All mothers,0.1,1985,2,10.4,
2,"Infant, neonatal, postneonatal, fetal, and per...",1,"Infant deaths per 1,000 live births",1,Total,0,All mothers,0.1,1990,3,8.9,
3,"Infant, neonatal, postneonatal, fetal, and per...",1,"Infant deaths per 1,000 live births",1,Total,0,All mothers,0.1,1995,4,7.6,
4,"Infant, neonatal, postneonatal, fetal, and per...",1,"Infant deaths per 1,000 live births",1,Total,0,All mothers,0.1,1996,5,7.3,


Looking at the data header, we will need to sort the columns in order to have a useful format for visualization.

In [None]:
# Filter out duplicate values, outliers, and null values
newdf = df.loc[~df['STUB_NAME'].isin(['Race (Single race)','Race and Hispanic origin (Single race)'])]
findf = newdf.loc[~df['STUB_LABEL'].isin(['American Indian or Alaska Native', 'Asian or Pacific Islander', 'Black or African American', 'White',
                                          'Not Hispanic or Latina: Asian','Not Hispanic or Latina: Native Hawaiian or Other Pacific Islander'])]
findf = findf.loc[~df['UNIT'].isin(['Late fetal deaths per 1,000 live births plus late fetal deaths', 'Neonatal deaths per 1,000 live births', 
                                    'Perinatal deaths per 1,000 live births plus late fetal deaths', 'Postneonatal deaths per 1,000 live births'])]

# Clean up labeling for clarity
findf = findf.rename(columns={'YEAR':'Year','STUB_LABEL':'Demographics','ESTIMATE':'Estimate','UNIT':'Type'})

Now we can build our visualization, cleaning up the data along the way.

In [304]:
# Implementing a dropdown search
dropdown = alt.binding_select(options=findf["Demographics"].unique(), name="Select a demographic:  ")

# Implementing selection
selection = alt.selection_point(fields=['Demographics'], on='click', bind=dropdown)

# Generate trends over time for each race and ethnic group
findf['Year'] = findf['Year'].astype(str)
allracePlot = alt.Chart(findf, height=500, width=600).mark_circle(size=100).encode(
    x = "Year:T",
    y = alt.Y("mean(Estimate)", axis=alt.Axis(title='Est. Average Fetal and Infant Deaths')),
    color = alt.Color("Demographics", legend=alt.Legend(orient='left', titleFontSize=20, labelFontSize=15, labelLimit=500), scale=alt.Scale(scheme='tableau10')),
    tooltip = ["Demographics", "mean(Estimate)"],
    opacity = alt.condition(selection, alt.value(1), alt.value(.2))
).add_params(selection)

# Create detail plot of the data
generalracePlot = alt.Chart(findf, height=600, width=450).mark_bar().encode(
    y = alt.Y('Type', axis=alt.Axis(labelExpr="substring(datum.label, 0, indexof(datum.label, ' '))")),
    x = alt.X('mean(Estimate)', axis=alt.Axis(title='Avg. Deaths from 1983-2018'))
).transform_filter(selection).properties(height=250, width=350)

# Concatenate so that the detail plot is linked to the main scatter plot
(allracePlot | generalracePlot).properties(
    title=alt.TitleParams('Reported U.S. Infant and Fetal Deaths per 1,000 Live Births', anchor='middle', fontSize=35)
).configure_axis(
    labelFontSize=12,
    titleFontSize=20
)