The purpose of this notebook is to visualize how MMR vaccination coverage varied from state to state between 1999 and 2014. After the data was cleaned, D3.js was used to create an interactive map. The data used to create this visualization can be found on the [CDC's website](https://www.cdc.gov/vaccines/imz-managers/coverage/nis/child/index.html).

In [1]:
import pandas as pd
import urllib.request, json
from IPython.display import IFrame

In [2]:
def to_html(src: str, body = '', code = '', style = '', title = ''):
    """Take strings of HTML, CSS, and JavaScript code as arguments and write them to a file."""
    with open(src, 'w') as doc:
        doc.write(f"""
            <!doctype html>
            <html>
                <head>
                    <script src='https://d3js.org/d3.v5.min.js'></script>
                    <style>{style}</style>
                    <title>{title}</title>
                </head>
                <body>
                    {body}
                    <script>{code}</script>
                </body>
            </html>""")
        
def to_array(df):
    """Converts a dataframe to an array of JSON objects"""
    return str(df.T.to_dict().values())[12:-1]

In [3]:
def fix_state(state):
    """Ensure that DC and the US National average have the same name in every df"""
    if 'National' in state:
        return 'U.S. National'
    elif 'Columbia' in state:
        return 'District of Columbia'
    else:
        return state

def clean_df(year: str, path: str, skiprows: int):
    """Create a two column df from each excel sheet"""
    df = pd.read_excel(path, skiprows = skiprows)
    df = df.head(52)
    df = df[[col for col in df.columns if '1+MMR' in col or 'Unnamed: 0' in col]]
    df.columns = ['State', '1+MMR']
    df['1+MMR'] = df['1+MMR'].apply(lambda x: float(x[:x.find('±')]))
    df['State'] = df['State'].apply(fix_state)
    all_years[year] = df

In [4]:
all_years = {}

# The Excel sheets differ in how many rows they use as a header so we save them to a dictionary manually
clean_df('1999', 'Data/1999.xls', 2)
clean_df('2000', 'Data/2000.xls', 2)
clean_df('2001', 'Data/2001.xls', 1)
clean_df('2002', 'Data/2002.xls', 4)
clean_df('2003', 'Data/2003.xls', 3)
clean_df('2004', 'Data/2004.xls', 3)
clean_df('2005', 'Data/2005.xls', 4)
clean_df('2006', 'Data/2006.xls', 4)
clean_df('2007', 'Data/2007.xls', 4)
clean_df('2008', 'Data/2008.xls', 3) 
clean_df('2009', 'Data/2009.xls', 4)
clean_df('2010', 'Data/2010.xlsx', 3)
clean_df('2011', 'Data/2011.xlsx', 3)
clean_df('2012', 'Data/2012.xlsx', 3)
clean_df('2013', 'Data/2013.xlsx', 2)
clean_df('2014', 'Data/2014.xlsx', 2)

In [5]:
merged_df = all_years['1999']

# Merge all the dataframes into one
for year in range(2000, 2015):
    merged_df = merged_df.merge(all_years[str(year)], on = 'State', how = 'outer')

merged_df.columns = ['State'] + [str(year) for year in range(1999, 2015)]
merged_df

Unnamed: 0,State,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014
0,U.S. National,91.5,90.5,91.4,91.6,93.0,93.0,91.5,92.3,92.3,92.1,90.0,91.5,91.6,90.8,91.9,91.5
1,Alabama,90.1,87.8,94.0,91.6,91.2,91.8,94.1,94.0,95.0,93.6,95.4,95.4,94.0,93.1,89.7,92.0
2,Alaska,90.7,88.8,87.8,88.7,90.7,89.7,90.8,85.7,89.7,88.4,85.2,88.4,90.8,86.2,90.5,90.2
3,Arizona,87.4,87.7,88.7,88.9,91.5,92.9,88.8,87.7,89.0,92.2,90.8,87.7,86.7,88.3,91.4,84.1
4,Arkansas,86.9,88.4,90.3,92.8,90.6,94.6,89.0,85.8,92.5,92.2,81.8,90.5,93.7,92.3,88.3,89.1
5,California,92.8,90.8,91.3,90.4,92.8,93.1,91.6,92.8,94.6,92.7,89.8,91.4,91.0,91.5,90.7,90.5
6,Colorado,90.4,87.2,92.1,90.7,85.6,90.8,93.2,88.3,91.2,92.3,83.6,89.3,88.4,91.5,86.0,87.4
7,Connecticut,95.4,95.9,93.9,95.3,98.4,95.0,95.2,96.5,95.3,95.3,93.7,97.8,95.0,94.8,91.4,93.2
8,Delaware,94.2,90.2,93.6,95.2,93.0,94.5,95.2,96.4,94.8,93.1,90.2,94.0,90.6,94.4,94.8,90.8
9,District of Columbia,91.2,86.2,91.9,91.2,92.8,94.6,91.6,92.2,95.2,89.7,91.2,94.7,93.5,93.0,96.2,90.9


In [6]:
# Save geoJSON data for the United States
with urllib.request.urlopen("https://eric.clst.org/assets/wiki/uploads/Stuff/gz_2010_us_040_00_500k.json") as url:
    US_GeoJSON = json.loads(url.read().decode())

In [7]:
body = """
    <h1 id='year'>1999</h1>
    <svg id="viz" width=1050 height=800>
        <g id="map"></g>
    </svg>
    <h2 id='selection'></h2>
"""

code = f"""
    const merged_array = {to_array(merged_df)};
    const geoJSON = {str(US_GeoJSON)};
    
    geoJSON.features.forEach((feature) => {{
        const state = feature.properties.NAME;
        merged_array.forEach((row) => {{
            if (row.State === state) {{
                feature.data = row;
            }}
        }})
    }})
    
    const geoMercator = d3.geoMercator()
                          .scale(500)
                          .center([-98.35, 39.5]) // Center of the country
                          .translate([675, 550]);                      
    const colorScale = d3.scaleLinear()
                     .domain([80, 100])
                     .range(['white', 'darkblue']); 
    const geoPath = d3.geoPath().projection(geoMercator);
    
    let year = 1999;
    
    function drawMap(year) {{
        d3.select('#map').selectAll('path').remove();
        
        d3.select('#map').selectAll('path')
            .data(geoJSON.features)
            .enter()
            .append('path')
            .attr('d', d => geoPath(d))
            .attr('transform', 'translate(30, 30)')
            .attr('stroke', 'white')
            .attr('fill', (d, i, nodes) => {{
                // Leave Puerto Rico blank
                if (d.data) {{ 
                    return colorScale(d.data[year]);
                }}
            }})
            .on('mouseover', (d, i, nodes) => {{
                document.querySelector('#selection').innerHTML = `${{d.data.State}}: ${{d.data[year]}}`;
            }});
    }}
    
    drawMap(1999);
    year += 1;
    
    setInterval(() => {{
        if (year > 2014) {{
            year = 1999;
        }}
        drawMap(year);
                 
        document.querySelector('#year').innerHTML = year;
        document.querySelector('#selection').innerHTML = '';
        year += 1;
    }}, 2500)             
"""

style = """
    path:hover {
        opacity: 0.5;
        fill: blue;
    }
"""
to_html('Output/US.html', body = body, code = code, title = 'MMR Map' ,style = style)
IFrame('Output/US.html', width=1100, height=1000)