# Exploration for Reported Assessment Results and Suppressed Assessment Information

## Imports and Such

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Import assessment data minus fully suppressed scores
assessments = pd.read_pickle('../data/school_based/assessments_clean.pkl')

# Import Suppressed outlier data
suppressed = pd.read_pickle ('../data/school_based/full_suppression.pkl')

## Listy McListface - A Place to look at the lists in my dataframe.

In [3]:
# Assessments Info
assessments.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 335452 entries, 0 to 381480
Data columns (total 27 columns):
 #   Column              Non-Null Count   Dtype   
---  ------              --------------   -----   
 0   year                335452 non-null  int64   
 1   system_name         335452 non-null  object  
 2   school_name         335452 non-null  object  
 3   test                335452 non-null  object  
 4   subject             335452 non-null  object  
 5   subject_area        335452 non-null  object  
 6   student_group       335452 non-null  object  
 7   valid_tests         335452 non-null  float64 
 8   pct_met_exceeded    335452 non-null  float64 
 9   system              335452 non-null  object  
 10  school              335452 non-null  object  
 11  school_lvl          335452 non-null  object  
 12  tot_enrolled        335452 non-null  object  
 13  fte_teachers        334603 non-null  float64 
 14  stu_tchr_ratio      334603 non-null  float64 
 15  school_type   

In [4]:
# Student Groups
student_group_list = np.unique(assessments['student_group'].values).tolist()
student_group_list

['All Students',
 'American Indian or Alaska Native',
 'Asian',
 'Black or African American',
 'Black/Hispanic/Native American',
 'Economically Disadvantaged',
 'English Learner Transitional 1-4',
 'English Learners',
 'English Learners with Transitional 1-4',
 'Female',
 'Gifted',
 'Hispanic',
 'Male',
 'Native Hawaiian or Other Pacific Islander',
 'Non-Black/Hispanic/Native American',
 'Non-Economically Disadvantaged',
 'Non-English Learners/Transitional 1-4',
 'Non-Students with Disabilities',
 'Students with Disabilities',
 'Super Subgroup',
 'White']

In [5]:
# School Types
school_type_list = np.unique(assessments['school_type'].values).tolist()
school_type_list

['1-Regular school',
 '2-Special education school',
 '4-Alternative Education School']

In [6]:
# Subject Areas
subject_area_list = np.unique(assessments['subject_area'].values).tolist()
subject_area_list

['ELA', 'Math', 'Science', 'Social Studies']

## Unsuppressed: Broad Overview of Results

### Overall Reported Proficiencies by School-Level, Subject Area, and Year.

In [7]:
# Subject Area Pivot Table
subject_area_pivot = pd.pivot_table(assessments,
                       values = 'pct_met_exceeded',
                       index = ['school_lvl', 'subject_area'],
                       columns = 'year',
                       aggfunc = np.mean)

# Get the current list of years
years = list(subject_area_pivot.columns)

# Include 2020 as nan values to show a gap when testing did not occur
years.insert(years.index(2021), 2020)

# Reindex and set the columns to years
subject_area_pivot = subject_area_pivot.reindex(columns=years)

# Reset index to convert multi-index into columns
subject_area_pivot.reset_index(inplace=True)

subject_area_pivot

year,school_lvl,subject_area,2018,2019,2020,2021,2022
0,Elementary,ELA,0.313354,0.317439,,0.288133,0.332412
1,Elementary,Math,0.351674,0.40286,,0.308109,0.338107
2,Elementary,Science,0.519245,,,0.353617,0.367404
3,Elementary,Social Studies,0.338689,0.345175,,0.328732,0.364729
4,High,ELA,0.269997,0.331378,,0.294335,0.348687
5,High,Math,0.231409,0.26061,,0.201373,0.229202
6,High,Science,0.400077,,,0.349336,0.355829
7,High,Social Studies,0.270426,0.286531,,0.305459,0.343403
8,Middle,ELA,0.272938,0.289341,,0.244374,0.294892
9,Middle,Math,0.362565,0.390276,,0.341143,0.357363


### 🏋️ Weighting Metrics Based on Valid Tests

#### 🏫 List of columns in assessments (school level) for use in district analysis.

In [8]:
# Let's figure how how I can pivot this    
districts = assessments[['locale',
                         'year',
                         'system_name', 
                         'school_lvl', 
                         'subject_area', 
                         'student_group', 
                         'pct_met_exceeded_w', 
                         'school_type',
                         'magnet',
                         'charter',
                         'title_1',
                         'fte_teachers_w',
                         'stu_tchr_ratio_w',
                         'valid_tests']]

#### 📇 Indices for Weight Pivots

In [9]:
# Indices for pivots
indices = ['system_name',  # School District 
           'school_lvl', # Level of school (Elem, Middle, High)
           'school_type', # 
           'magnet', 
           'charter', 
           'title_1', 
           'locale', 
           'subject_area', 
           'student_group']

# # Calculate change in valid test scores from the year before schools closed down to the year after
# # Negative values indicate decrease from pre-pandemic. Positive values indicate an increase
# weight['1y_post'] = weight[2021] - weight[2019]
# # Calculate the change in valid test scores 2 years post-pandemic
# # Negative values indicate decrease in the first two years post-pandemic
# weight['2y_post'] = weight[2022] - weight[2021]
# # Calculate the difference between the current valid test scores and pre-pandemic
# # Negative values indicate scores are below pre-pandemic levels
# weight['pre_current'] = weight[2022] - weight[2019]
# # Look at the pivot
# weight

#### 🏋️➕ Sum of Valid_Test-Weighted Scores for pct_met_exceeded, fte_teachers, and student_tchr_ratio 

In [10]:
# Sum of valid test scores (The 🏋️)
weight = pd.pivot_table(
    districts,
    values='valid_tests',  # Column to calculate the sum of valid test scores
    index=indices,
    columns='year',
    aggfunc=np.sum
)

# Sum of weighted scores pivot for 'pct_met_exceeded'
sum_weighted_proficiency = pd.pivot_table(
    districts,
    values='pct_met_exceeded_w', # Weighted sum of students who met or exceeded expectations
    index=indices,
    columns='year',
    aggfunc=np.sum
)

# Sum of weighted scores pivot for 'fte_teachers'
sum_weighted_fte = pd.pivot_table(
    districts,
    values='fte_teachers_w', # Weighted sum of full-time equivalent teachers
    index=indices,
    columns='year',
    aggfunc=np.sum
)

# Sum of weighted scores pivot for 'stu_tchr_ratio'
sum_weighted_str = pd.pivot_table(
    districts,
    values='stu_tchr_ratio_w', # Weighted sum of student/teacher ratios
    index=indices,
    columns='year',
    aggfunc=np.sum
)

# Create a multi-level column index
column_index = pd.MultiIndex.from_product([['pct_met_exceeded', 'fte_teachers', 'stu_tchr_ratio'], sum_weighted_proficiency.columns])

# Concatenate the pivot tables horizontally
weighted_sums_pivot = pd.concat([sum_weighted_proficiency, sum_weighted_fte, sum_weighted_str], axis=1)
weighted_sums_pivot.columns = column_index

weighted_sums_pivot

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,pct_met_exceeded,pct_met_exceeded,pct_met_exceeded,pct_met_exceeded,fte_teachers,fte_teachers,fte_teachers,fte_teachers,stu_tchr_ratio,stu_tchr_ratio,stu_tchr_ratio,stu_tchr_ratio
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,year,2018,2019,2021,2022,2018,2019,2021,2022,2018,2019,2021,2022
system_name,school_lvl,school_type,magnet,charter,title_1,locale,subject_area,student_group,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
Achievement School District,Elementary,1-Regular school,0,0,1,11-City: Large,ELA,All Students,36.032,33.021,8.004,24.007,6363.2,5990.2,1867.75,2740.80,7574.44,8693.85,2121.56,4984.68
Achievement School District,Elementary,1-Regular school,0,0,1,11-City: Large,ELA,Black or African American,34.028,31.004,4.028,21.949,6040.3,5812.6,1159.00,2632.45,7218.27,8439.33,1255.52,4796.98
Achievement School District,Elementary,1-Regular school,0,0,1,11-City: Large,ELA,Black/Hispanic/Native American,34.030,31.097,8.026,22.015,6183.0,5947.2,1852.50,2723.96,7365.75,8634.68,2105.04,4941.44
Achievement School District,Elementary,1-Regular school,0,0,1,11-City: Large,ELA,Economically Disadvantaged,32.996,27.936,11.021,18.024,5524.8,5080.6,2445.00,1838.10,6683.24,7355.88,3086.04,3273.15
Achievement School District,Elementary,1-Regular school,0,0,1,11-City: Large,ELA,Female,,18.988,5.026,14.010,,2678.8,925.25,1868.10,,3659.84,1063.72,3097.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wilson County,Other,1-Regular school,0,0,0,31-Town: Fringe,Social Studies,Non-English Learners/Transitional 1-4,11.000,,195.026,33.000,192.5,,33330.18,846.50,350.00,,9136.14,655.50
Wilson County,Other,1-Regular school,0,0,0,31-Town: Fringe,Social Studies,Non-Students with Disabilities,10.002,,206.118,37.004,170.5,,34217.67,981.94,310.00,,9379.41,760.38
Wilson County,Other,1-Regular school,0,0,0,31-Town: Fringe,Social Studies,Students with Disabilities,,,4.991,,,,2268.03,,,,621.69,
Wilson County,Other,1-Regular school,0,0,0,31-Town: Fringe,Social Studies,Super Subgroup,,,77.952,15.012,,,17158.14,457.11,,,4703.22,353.97


#### 🏋️⚖️ Weighted Averages for pct_met_exceeded_w, fte_teachers_w, stu_tchr_ratio_w

In [11]:
# Divide sum_weighted_proficiency by weight
weighted_avg_proficiency = sum_weighted_proficiency / weight

# Divide sum_weighted_fte by weight
weighted_avg_fte = sum_weighted_fte / weight

# Divide sum_weighted_str by weight
weighted_avg_str = sum_weighted_str / weight

# Create a multi-level column index for the weighted average pivots
column_index = pd.MultiIndex.from_product([['pct_met_exceeded', 'fte_teachers', 'stu_tchr_ratio'], weighted_avg_proficiency.columns])

# Concatenate the weighted average pivots horizontally
weighted_avg_pivot = pd.concat([weighted_avg_proficiency, weighted_avg_fte, weighted_avg_str], axis=1)
weighted_avg_pivot.columns = column_index

weighted_avg_pivot

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,pct_met_exceeded,pct_met_exceeded,pct_met_exceeded,pct_met_exceeded,fte_teachers,fte_teachers,fte_teachers,fte_teachers,stu_tchr_ratio,stu_tchr_ratio,stu_tchr_ratio,stu_tchr_ratio
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,year,2018,2019,2021,2022,2018,2019,2021,2022,2018,2019,2021,2022
system_name,school_lvl,school_type,magnet,charter,title_1,locale,subject_area,student_group,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
Achievement School District,Elementary,1-Regular school,0,0,1,11-City: Large,ELA,All Students,0.090990,0.075048,0.057583,0.089914,16.068687,13.614091,13.437050,10.265169,19.127374,19.758750,15.263022,18.669213
Achievement School District,Elementary,1-Regular school,0,0,1,11-City: Large,ELA,Black or African American,0.090260,0.072609,0.053000,0.085405,16.022016,13.612646,15.250000,10.242996,19.146605,19.764239,16.520000,18.665292
Achievement School District,Elementary,1-Regular school,0,0,1,11-City: Large,ELA,Black/Hispanic/Native American,0.088390,0.071160,0.058159,0.083075,16.059740,13.609153,13.423913,10.279094,19.131818,19.758993,15.253913,18.646943
Achievement School District,Elementary,1-Regular school,0,0,1,11-City: Large,ELA,Economically Disadvantaged,0.094816,0.074895,0.058005,0.101831,15.875862,13.620912,12.868421,10.384746,19.204713,19.720858,16.242316,18.492373
Achievement School District,Elementary,1-Regular school,0,0,1,11-City: Large,ELA,Female,,0.104330,0.070789,0.087563,,14.718681,13.031690,11.675625,,20.109011,14.981972,19.357500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wilson County,Other,1-Regular school,0,0,0,31-Town: Fringe,Social Studies,Non-English Learners/Transitional 1-4,0.314286,,0.577000,0.660000,5.500000,,98.610000,16.930000,10.000000,,27.030000,13.110000
Wilson County,Other,1-Regular school,0,0,0,31-Town: Fringe,Social Studies,Non-Students with Disabilities,0.322645,,0.594000,0.638000,5.500000,,98.610000,16.930000,10.000000,,27.030000,13.110000
Wilson County,Other,1-Regular school,0,0,0,31-Town: Fringe,Social Studies,Students with Disabilities,,,0.217000,,,,98.610000,,,,27.030000,
Wilson County,Other,1-Regular school,0,0,0,31-Town: Fringe,Social Studies,Super Subgroup,,,0.448000,0.556000,,,98.610000,16.930000,,,27.030000,13.110000


In [32]:
import plotly.graph_objects as go

# Reset the index to make the columns accessible
graph_data = weighted_avg_pivot.stack().reset_index()

In [60]:
# Get unique school levels
school_levels = graph_data['school_lvl'].unique()

# Create the heatmap traces for each school level
heatmaps = []
for school_level in school_levels:
    data = graph_data[graph_data['school_lvl'] == school_level]
    heatmap = go.Heatmap(
        x=data['subject_area'],
        y=data['year'],
        z=data['pct_met_exceeded'],
        colorscale='viridis',
        colorbar=dict(title='pct_met_exceeded'),
        name=school_level
    )
    heatmaps.append(heatmap)

# Create the figure and add the heatmaps
fig = go.Figure(data=heatmaps)

# Customize the layout
fig.update_layout(
    title='Changes in pct_met_exceeded by School Level, Subject Area, and Year',
    xaxis=dict(title='Subject Area'),
    yaxis=dict(title='Year'),
    height=600,
    width=800
)

# Show the facetted heatmap
fig.show()


In [79]:
import plotly.express as px

# Calculate average proficiency per school level, content area, and year
average_proficiency = graph_data.groupby(['school_lvl', 'subject_area', 'year'])['pct_met_exceeded'].mean().reset_index()

# Reorder scool levels
school_lvl_order = ['Elementary', 'Middle', 'High', 'Secondary', 'Other']

# Create bar plot
fig = px.bar(average_proficiency, x='year', y='pct_met_exceeded', color='subject_area',
             facet_row='subject_area', facet_col='school_lvl',
             category_orders={'subject_area': subject_areas, 'school_lvl': school_lvl_order},
             labels={'pct_met_exceeded': 'Average Proficiency',
                     'school_lvl': 'School Level',
                     'subject_area': 'Subject Area'})

# Update layout
fig.update_layout(
    title='Average Weighted Proficiency by School Level, Content Area, and Year',
   autosize=True,
    width=1200,
    height=900,
)

# Update the graph size to 50% Larger
fig.update_layout(
    autosize=True,
    width=1200,
    height=900,
)

# Show the figure
fig.show()
