In [65]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.api as sm

In [53]:
test_scores = pd.read_csv('../data/test_scores.csv')
test_scores.rename(columns={'0':'State'}, inplace=True)
test_scores.set_index(keys='State', inplace=True)

# CSV provided by Chris Halpert, https://github.com/cphalpert/census-regions). 
census_regions = pd.read_csv('../data/census_regions_and_divisions.csv')
census_regions.drop(columns=['Division'], inplace=True)
census_regions.set_index(keys='State', inplace=True)
census_regions.rename(index={'District of Columbia':'DC'}, inplace=True)
census_regions.columns = census_regions.columns.str.lower().str.replace(' ', '_')
# Append to test scores
test_scores = pd.merge(test_scores, census_regions, left_index=True, right_index=True)

# Collections of columns of similar category for easy reference throughout 
sat_cols = test_scores.columns[test_scores.columns.str.contains('sat') &
                              ~test_scores.columns.str.contains('part') ]
sat_st_cols = sat_cols[sat_cols.str.contains('erw') | 
                       sat_cols.str.contains('math')]
act_cols = test_scores.columns[test_scores.columns.str.contains('act') &
                              ~test_scores.columns.str.contains('part') ]
act_st_cols = act_cols[act_cols.str.len() > 8]
part_cols = test_scores.columns[test_scores.columns.str.contains('part')]

## Test Averages

#### Extreme SAT Performers

Below we can see Minnesota, Wisconsin, Iowa, and Kansas achieved average SAT scores in the top 5 nationally for both 2017 and 2018. We observe all states with top 10 average SAT scores have participation rates less than 5 percent. Conversely, the 10 states suffering the lowest average SAT scores tend to have much higher articipation rates, with 6 having rates over 90 percent.

In [58]:
top_sat = pd.DataFrame(columns = sat_cols)
for col in sat_cols:
    top_sat[col] = test_scores[col].sort_values(ascending=False).head(5).index
top_sat.set_index(pd.Index(['1st', '2nd', '3rd', '4th', '5th']), inplace=True)

top_sat

Unnamed: 0,sat_2017,sat_2017_erw,sat_2017_math,sat_2018,sat_2018_erw,sat_2018_math
1st,Minnesota,Minnesota,Minnesota,Minnesota,Minnesota,Minnesota
2nd,Wisconsin,Wisconsin,Wisconsin,Wisconsin,Wisconsin,Wisconsin
3rd,Iowa,Iowa,Iowa,North Dakota,North Dakota,North Dakota
4th,Missouri,Missouri,Missouri,Iowa,Iowa,Iowa
5th,Kansas,North Dakota,Kansas,Kansas,Wyoming,Kansas


In [59]:
test_scores.loc[test_scores['sat_2018'].sort_values(ascending=False).head(5).index, ['sat_2018', 'sat_2018_part']]

Unnamed: 0_level_0,sat_2018,sat_2018_part
State,Unnamed: 1_level_1,Unnamed: 2_level_1
Minnesota,1298,4
Wisconsin,1294,3
North Dakota,1283,2
Iowa,1265,3
Kansas,1265,4


In [60]:
test_scores.loc[test_scores['sat_2018'].sort_values().head(5).index, ['sat_2018', 'sat_2018_part']]

Unnamed: 0_level_0,sat_2018,sat_2018_part
State,Unnamed: 1_level_1,Unnamed: 2_level_1
DC,977,92
Delaware,998,100
West Virginia,999,28
Idaho,1001,100
Michigan,1011,100


In [91]:
test_scores['text'] = 'SAT Average: ' + test_scores['sat_2018'].astype(str) + '<br>' + \
    'SAT Math Avg: ' + test_scores['sat_2018_math'].astype(str) + '<br>' + \
    'SAT ERW Avg: ' + test_scores['sat_2018_erw'].astype(str) + '<br>' + \
    'SAT Participation: ' + test_scores['sat_2018_part'].astype(str) +'%'

sat_map = go.Figure(data=go.Choropleth(
    locations=test_scores['state_code'], 
    locationmode="USA-states", 
    text=test_scores['text'],
    z=test_scores['sat_2018'],
    colorscale='Greens',
    colorbar_title="Average SAT Score"
))

sat_map.update_layout(
    title_text = 'High SAT Average Score States (2018)',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'))
)
sat_map.show()

#### Extreme ACT Performers

We see New Hampshire, Massachusettes, and Connecticut scored achieved top 5 both years for ACT. We note the absence of overlap between these two top 5 lists. A similar pattern emerges as with the SAT scores above, where the top 10 ACT performers tend to have low participation rates whereas low performers tend to have high participation rates, with 8 of the bottom 10 states implmenenting universal testing.

In [80]:
top_act = pd.DataFrame(columns = act_cols)
for col in act_cols:
    top_act[col] = test_scores[col].sort_values(ascending=False).head(5).index
top_act.set_index(pd.Index(['1st', '2nd', '3rd', '4th', '5th']), inplace=True)

top_act

Unnamed: 0,act_2017,act_2017_english,act_2017_math,act_2017_reading,act_2017_science,act_2018,act_2018_english,act_2018_math,act_2018_reading,act_2018_science
1st,Massachusetts,Connecticut,Massachusetts,New Hampshire,Massachusetts,New Hampshire,Connecticut,Massachusetts,Connecticut,New Hampshire
2nd,New Hampshire,New Hampshire,New Hampshire,Connecticut,Connecticut,Connecticut,New Hampshire,New Hampshire,Rhode Island,New York
3rd,Connecticut,Massachusetts,Maine,Massachusetts,New Hampshire,Massachusetts,Massachusetts,New York,Massachusetts,Massachusetts
4th,Michigan,Maine,Connecticut,Delaware,New York,Michigan,New York,Connecticut,New Hampshire,Connecticut
5th,Maine,Rhode Island,New York,New York,Michigan,Rhode Island,Michigan,Delaware,DC,DC


In [82]:
test_scores.loc[test_scores['act_2018'].sort_values(ascending=False).head(5).index, ['act_2018', 'act_2018_part']]

Unnamed: 0_level_0,act_2018,act_2018_part
State,Unnamed: 1_level_1,Unnamed: 2_level_1
New Hampshire,25,16
Connecticut,25,26
Massachusetts,25,25
Michigan,24,22
Rhode Island,24,15


In [83]:
test_scores.loc[test_scores['act_2018'].sort_values().head(5).index, ['act_2018', 'act_2018_part']]

Unnamed: 0_level_0,act_2018,act_2018_part
State,Unnamed: 1_level_1,Unnamed: 2_level_1
Nevada,17,100
Mississippi,18,100
Hawaii,18,89
South Carolina,18,100
Louisiana,19,100


In [84]:
test_scores.columns

Index(['sat_2017', 'sat_2017_erw', 'sat_2017_math', 'sat_2017_part',
       'sat_2018', 'sat_2018_erw', 'sat_2018_math', 'sat_2018_part',
       'act_2017', 'act_2017_english', 'act_2017_math', 'act_2017_reading',
       'act_2017_science', 'act_2017_part', 'act_2018', 'act_2018_english',
       'act_2018_math', 'act_2018_reading', 'act_2018_science',
       'act_2018_part', 'state_code', 'region', 'text'],
      dtype='object')

In [90]:
test_scores['text'] = 'ACT Average: ' + test_scores['act_2018'].astype(str) + '<br>' + \
    'ACT English Avg: ' + test_scores['act_2018_english'].astype(str) + '<br>' + \
    'ACT Reading Avg: ' + test_scores['act_2018_reading'].astype(str) + '<br>' + \
    'ACT Math Avg: ' + test_scores['act_2018_math'].astype(str) + '<br>' + \
    'ACT Science Avg: ' + test_scores['act_2018_science'].astype(str) + '<br>' + \
    'ACT Participation: ' + test_scores['act_2018_part'].astype(str) +'%'

sat_map = go.Figure(data=go.Choropleth(
    locations=test_scores['state_code'], 
    locationmode="USA-states", 
    text=test_scores['text'],
    z=test_scores['act_2018'],
    colorscale='Blues',
    colorbar_title="Average ACT Score"
))

sat_map.update_layout(
    title_text = 'High ACT Average Score States (2018)',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'))
)
sat_map.show()

## Participation Rates

A quick survey confirms students within a state tend to prioritize taking only one of the two assessments. No state boasts participation rates for both the ACT and SAT over 80% in either 2017 or 2018. Florida lead the nation in 2017 cross-participation with 73% taking the ACT and 83% taking the SAT, as well as 2018 with 66% taking the ACT and 97% taking the SAT. Only six states had participation rates over 40% for both assessments in 2017, while eight did in 2018.

Examining those states with near universal test participation for one assessment reveals a clear of pattern of prioritizing a single assessment and to ignore the other. Of the nineteen states with over 95% participation in the ACT assessment, only two experienced over 50% participation taking the SAT, with a median of 4%. Of the nine states with over 95% participation in the SAT assessment, none had an ACT participation rate over 50%, but they have a much higher median ACT participation rate of 22%.

In [87]:
test_scores.loc[(test_scores['act_2017_part'] > 60) 
                & (test_scores['sat_2017_part'] > 60), 
                ['sat_2017_part', 'act_2017_part','sat_2018_part', 'act_2018_part']]

Unnamed: 0_level_0,sat_2017_part,act_2017_part,sat_2018_part,act_2018_part
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Florida,83,73,97,66


In [86]:
test_scores.loc[(test_scores['act_2018_part'] > 40) 
                & (test_scores['sat_2018_part'] > 40), 
                ['sat_2017_part', 'act_2017_part','sat_2018_part', 'act_2018_part']]

Unnamed: 0_level_0,sat_2017_part,act_2017_part,sat_2018_part,act_2018_part
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Florida,83,73,97,66
Georgia,61,55,70,53
Hawaii,55,90,56,89
Illinois,9,93,99,43
North Carolina,49,100,52,100
Oregon,43,40,48,42
South Carolina,50,100,55,100
Texas,62,45,66,41


In [88]:
test_scores.loc[(test_scores['act_2018_part'] > 95) , 
                ['act_2018_part','sat_2018_part']].sort_values('sat_2018_part', ascending=False).T

State,South Carolina,North Carolina,Nevada,Ohio,Montana,Oklahoma,Tennessee,Alabama,Arkansas,Missouri,Minnesota,Louisiana,Kentucky,Utah,Nebraska,Mississippi,Wisconsin,Wyoming,North Dakota
act_2018_part,100,100,100,100,100,100,100,100,100,100,99,100,100,100,100,100,100,100,98
sat_2018_part,55,52,23,18,10,8,6,6,5,4,4,4,4,4,3,3,3,3,2


In [89]:
test_scores.loc[(test_scores['sat_2018_part'] > 95) , 
                ['act_2018_part','sat_2018_part']].sort_values('sat_2018_part', ascending=False).T

State,Colorado,Connecticut,Delaware,Idaho,Michigan,Illinois,Maine,Florida,Rhode Island,New Hampshire
act_2018_part,30,26,17,36,22,43,7,66,15,16
sat_2018_part,100,100,100,100,100,99,99,97,97,96


#### Geographic Distribution of Participation Rates

Below we observe that ACT dominates the market in the Midwest while SAT leads more modestly along the coasts. Two noticable color changes occur between 2017 and 2018 for Colorado and Illinois which are discussed later. Comparing to the previous maps displaying average score by state, one can already see a tendency for high score averages to match low participation rates for each test.

In [92]:
part_map = px.choropleth(locations=test_scores['state_code'], 
                        locationmode="USA-states", 
                        color=test_scores['act_2017_part'] - test_scores['sat_2017_part'],
                        color_continuous_scale=['#2ca02c', 'white' ,'#1f77b4'],
                        scope="usa")
part_map.update_layout(
    title_text = 'ACT Participation Rate - SAT Participation Rate (2017)'
)
part_map.show()

In [93]:
part_map = px.choropleth(locations=test_scores['state_code'], 
                        locationmode="USA-states", 
                        color=test_scores['act_2018_part'] - test_scores['sat_2018_part'],
                        color_continuous_scale=['#2ca02c', 'white' ,'#1f77b4'],
                        scope="usa")
part_map.update_layout(
    title_text = 'ACT Participation Rate - SAT Participation Rate (2018)'
)
part_map.show()