Importing necessary packages

In [1]:
import pandas as pd
import os
import numpy as np

In [2]:
# Initialize dictionary to store dataframes
dataframes = {}

# Base directory
base_path = r"/Users/everettadkins/Documents/Senior Spring/DSCI Capstone/Data"

# Iterate through years (only for available files)
for year in range(2020, 2024):  # Only process existing files (2020-2023)
    file_path = os.path.join(base_path, f"merged_{year}.csv")

    if os.path.exists(file_path):  # Check if file exists
        # Load the DataFrame
        df = pd.read_csv(file_path)

        # Create the "Year" column
        school_year = f"{str(year-1)[-2:]}-{str(year)[-2:]}"  # Converts 2020 → '19-20'
        df["Year"] = school_year

        # Store it in dictionary
        key = f"merged_{year}"  # e.g., "merged_2020"
        dataframes[key] = df

# Print a sample of the new "Year" column to verify
for key, df in dataframes.items():
    print(f"Sample from {key}:")
    print(df[["Year"]].head(), "\n")  # Show only the "Year" column for quick verification


Sample from merged_2023:
    Year
0  22-23
1  22-23
2  22-23
3  22-23
4  22-23 



In [3]:
# Initialize dictionary to store dataframes
dataframe2 = {}

# Base directory
base_path = r"/Users/everettadkins/Documents/Senior Spring/DSCI Capstone/Data"

# List of filenames to load
file_names = [
    "distgrad_2023_clean.csv",
    "distperf1_2023_clean.csv",
    "distperf2_2023_clean.csv",
    "distprof_2023_clean.csv",
    "district_type2023_clean.csv",
    "dref_2023_clean.csv",
    "diststaar1_2023_clean.csv"
]

# Iterate through file names
for file_name in file_names:
    file_path = os.path.join(base_path, file_name)

    if os.path.exists(file_path):  # Check if file exists
        key = file_name.split(".")[0]  # Remove ".csv" for dictionary key
        dataframe2[key] = pd.read_csv(file_path)  # Load DataFrame

# Print keys to verify successful loading
print("Loaded DataFrames:", dataframe2.keys())


Loaded DataFrames: dict_keys(['distgrad_2023_clean', 'distperf1_2023_clean', 'distperf2_2023_clean', 'distprof_2023_clean', 'district_type2023_clean', 'dref_2023_clean', 'diststaar1_2023_clean'])


In [6]:
# Function to standardize column names: lowercase + strip spaces
def clean_column_names(df):
    df.columns = df.columns.str.lower().str.strip()
    return df

# Apply column cleaning to merged_2023
dataframes['merged_2023'] = clean_column_names(dataframes['merged_2023'])

# Apply column cleaning to all files in dataframe3
for key in dataframe2.keys():
    dataframe2[key] = clean_column_names(dataframe2[key])

# Get merged_2023 column set
yearly_cols = set(dataframes['merged_2023'].columns)

# Compare with each dataset and store results
matching_counts = []
for key, df in dataframe2.items():
    dataset_cols = set(df.columns)
    common_cols = yearly_cols.intersection(dataset_cols)
    
    matching_counts.append({
        "Dataset": key,
        "Total Columns in Dataset": len(dataset_cols),
        "Total Columns in yearly_data_2023": len(yearly_cols),
        "Matching Columns": len(common_cols)
    })

# Convert results to a DataFrame for better readability
matching_counts_df = pd.DataFrame(matching_counts)

matching_counts_df


Unnamed: 0,Dataset,Total Columns in Dataset,Total Columns in yearly_data_2023,Matching Columns
0,distgrad_2023_clean,950,6553,947
1,distperf1_2023_clean,2012,6553,2012
2,distperf2_2023_clean,1298,6553,1298
3,distprof_2023_clean,426,6553,423
4,district_type2023_clean,7,6553,7
5,dref_2023_clean,8,6553,8
6,diststaar1_2023_clean,1900,6553,1900


In [7]:
dataframes['merged_2023'].columns = dataframes['merged_2023'].columns.str.strip()


level_mapping = {
    'Approaches Grade Level': {
        'Mathematics': [col for col in dataframes['merged_2023'].columns if 'mathematics' in col and 'approaches grade level' in col],
        'Reading/ELA': [col for col in dataframes['merged_2023'].columns if 'reading/ela' in col and 'approaches grade level' in col],
        'Writing': [col for col in dataframes['merged_2023'].columns if 'writing' in col and 'approaches grade level' in col],
        'Science': [col for col in dataframes['merged_2023'].columns if 'science' in col and 'approaches grade level' in col],
        'Social Studies': [col for col in dataframes['merged_2023'].columns if 'social studies' in col and 'approaches grade level' in col],
    },
    'Meets Grade Level': {
        'Mathematics': [col for col in dataframes['merged_2023'].columns if 'mathematics' in col and 'meets grade level' in col],
        'Reading/ELA': [col for col in dataframes['merged_2023'].columns if 'reading/ela' in col and 'meets grade level' in col],
        'Writing': [col for col in dataframes['merged_2023'].columns if 'writing' in col and 'meets grade level' in col],
        'Science': [col for col in dataframes['merged_2023'].columns if 'science' in col and 'meets grade level' in col],
        'Social Studies': [col for col in dataframes['merged_2023'].columns if 'social studies' in col and 'meets grade level' in col],
    },
    'Masters Grade Level': {
        'Mathematics': [col for col in dataframes['merged_2023'].columns if 'mathematics' in col and 'masters grade level' in col],
        'Reading/ELA': [col for col in dataframes['merged_2023'].columns if 'reading/ela' in col and 'masters grade level' in col],
        'Writing': [col for col in dataframes['merged_2023'].columns if 'writing' in col and 'masters grade level' in col],
        'Science': [col for col in dataframes['merged_2023'].columns if 'science' in col and 'masters grade level' in col],
        'Social Studies': [col for col in dataframes['merged_2023'].columns if 'social studies' in col and 'masters grade level' in col],
    }
}

# Create a new DataFrame for aggregating performance levels by subject
# Include 'distname', 'district_id', 'county' for context
df_agg_levels_subject_2023 = dataframes['merged_2023'][['distname', 'district_id', 'county']].copy()

# Iterate over each performance level and each subject to calculate the average scores
for level, subjects in level_mapping.items():
    for subject, columns in subjects.items():
        # Create a new column in the aggregation DataFrame for each subject-performance level combination
        # Calculate the mean score for each subject-performance level across the specified columns
        df_agg_levels_subject_2023[f'{subject} ({level})'] = dataframes['merged_2023'][columns].mean(axis=1)

df_agg_levels_subject_2023.head()

Unnamed: 0,distname,district_id,county,Mathematics (Approaches Grade Level),Reading/ELA (Approaches Grade Level),Writing (Approaches Grade Level),Science (Approaches Grade Level),Social Studies (Approaches Grade Level),Mathematics (Meets Grade Level),Reading/ELA (Meets Grade Level),Writing (Meets Grade Level),Science (Meets Grade Level),Social Studies (Meets Grade Level),Mathematics (Masters Grade Level),Reading/ELA (Masters Grade Level),Writing (Masters Grade Level),Science (Masters Grade Level),Social Studies (Masters Grade Level)
0,CAYUGA ISD,1902,'001,46.783784,51.306306,,42.388889,38.277778,29.954955,35.288288,,21.138889,12.111111,10.396396,14.810811,,4.777778,3.166667
1,ELKHART ISD,1903,'001,52.3,54.652542,,51.071429,53.611111,26.85,34.635593,,28.880952,28.555556,8.883333,13.288136,,9.5,12.222222
2,FRANKSTON ISD,1904,'001,49.540323,53.303279,,42.25,46.2,31.233871,32.42623,,26.975,18.1,10.758065,12.868852,,14.225,8.05
3,NECHES ISD,1906,'001,45.344086,51.578947,,42.645161,41.733333,34.043011,28.810526,,27.516129,9.6,9.16129,7.705263,,3.612903,2.6
4,PALESTINE ISD,1907,'001,66.485714,64.871166,,58.534483,50.833333,36.668571,33.257669,,30.034483,19.533333,10.942857,9.871166,,5.948276,6.333333


In [None]:
# This cell aggregates dropout rates across grades 7-12
# This gives overall dropout rates and by demographic group
# List of dropout rate columns to aggregate
dropout_columns = [
    'District 2022 Annual Dropout for Grades 07-08: All Students Rate',
    'District 2022 Annual Dropout for Grades 07-08: Male Rate',
    'District 2022 Annual Dropout for Grades 07-08: Female Rate',
    'District 2022 Annual Dropout for Grades 07-08: African American Rate',
    'District 2022 Annual Dropout for Grades 07-08: American Indian Rate',
    'District 2022 Annual Dropout for Grades 07-08: Asian Rate',
    'District 2022 Annual Dropout for Grades 07-08: Hispanic Rate',
    'District 2022 Annual Dropout for Grades 07-08: Pacific Islander Rate',
    'District 2022 Annual Dropout for Grades 07-08: Two or More Races Rate',
    'District 2022 Annual Dropout for Grades 07-08: White Rate',
    'District 2022 Annual Dropout for Grades 07-08: Econ Disadv Rate',
    'District 2022 Annual Dropout for Grades 07-08: Special Ed Rate',
    'District 2022 Annual Dropout for Grades 07-08: At Risk Rate',
    'District 2022 Annual Dropout for Grades 07-08: EB/EL Rate',
    'District 2022 Annual Dropout for Grades 09-12: All Students Rate',
    'District 2022 Annual Dropout for Grades 09-12: Male Rate',
    'District 2022 Annual Dropout for Grades 09-12: Female Rate',
    'District 2022 Annual Dropout for Grades 09-12: African American Rate',
    'District 2022 Annual Dropout for Grades 09-12: American Indian Rate',
    'District 2022 Annual Dropout for Grades 09-12: Asian Rate',
    'District 2022 Annual Dropout for Grades 09-12: Hispanic Rate',
    'District 2022 Annual Dropout for Grades 09-12: Pacific Islander Rate',
    'District 2022 Annual Dropout for Grades 09-12: Two or More Races Rate',
    'District 2022 Annual Dropout for Grades 09-12: White Rate',
    'District 2022 Annual Dropout for Grades 09-12: Econ Disadv Rate',
    'District 2022 Annual Dropout for Grades 09-12: Special Ed Rate',
    'District 2022 Annual Dropout for Grades 09-12: At Risk Rate',
    'District 2022 Annual Dropout for Grades 09-12: EB/EL Rate'
]

# Standardize column names in the dataset to lowercase
dataframes['merged_2023'].columns = dataframes['merged_2023'].columns.str.lower()

# Standardize dropout_columns to lowercase too
dropout_columns = [col.lower() for col in dropout_columns]

# Create a DataFrame with the selected dropout columns, ensuring DistrictName is included
df_dropout_2023 = dataframes['merged_2023'][['distname', 'district_id', 'county', *dropout_columns]].copy()

# Aggregate dropout rates into new columns, and convert identity labels to lowercase (to match column name case)
for identity in ['All Students', 'Male', 'Female', 'African American', 'American Indian', 'Asian',
                 'Hispanic', 'Pacific Islander', 'Two or More Races', 'White', 'Econ Disadv', 
                 'Special Ed', 'At Risk', 'EB/EL']:
    
    # Convert the column names to lowercase to match dataset column names
    col1 = f'District 2022 Annual Dropout for Grades 07-08: {identity} Rate'.lower()
    col2 = f'District 2022 Annual Dropout for Grades 09-12: {identity} Rate'.lower()

    # Ensure columns exist before trying to compute mean
    cols_to_avg = [col for col in [col1, col2] if col in df_dropout_2023.columns]
    
    if cols_to_avg:  # Only compute if at least one column exists
        df_dropout_2023.loc[:, f'{identity} Dropout Rate'] = df_dropout_2023[cols_to_avg].mean(axis=1, skipna=True)

# Drop only existing columns to avoid KeyErrors
df_dropout_2023.drop(columns=dropout_columns, inplace=True, errors='ignore')


df_dropout_2023

Unnamed: 0,distname,district_id,county,All Students Dropout Rate,Male Dropout Rate,Female Dropout Rate,African American Dropout Rate,American Indian Dropout Rate,Asian Dropout Rate,Hispanic Dropout Rate,Pacific Islander Dropout Rate,Two or More Races Dropout Rate,White Dropout Rate,Econ Disadv Dropout Rate,Special Ed Dropout Rate,At Risk Dropout Rate,EB/EL Dropout Rate
0,CAYUGA ISD,1902,'001,0.00,0.00,0.00,,,,0.00,,0.0,0.00,0.00,0.00,0.00,
1,ELKHART ISD,1903,'001,0.00,0.00,0.00,0.0,,,0.00,,0.0,0.00,0.00,0.00,0.00,
2,FRANKSTON ISD,1904,'001,0.40,0.75,0.00,0.0,,,0.00,,0.0,0.55,0.45,3.05,0.50,0.0
3,NECHES ISD,1906,'001,0.00,0.00,0.00,0.0,,,0.00,,,0.00,0.00,0.00,0.00,
4,PALESTINE ISD,1907,'001,0.10,0.10,0.10,0.0,,0.0,0.10,,0.0,0.15,0.10,0.00,0.15,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1204,NEWCASTLE ISD,252902,'252,0.00,0.00,0.00,,,,0.00,,,0.00,0.00,0.00,0.00,
1205,OLNEY ISD,252903,'252,1.05,1.65,0.45,,,,0.55,,0.0,1.50,1.05,1.50,2.05,0.0
1206,ZAPATA COUNTY ISD,253901,'253,1.05,1.50,0.55,,,,1.05,,,0.00,1.20,1.55,1.65,2.0
1207,CRYSTAL CITY ISD,254901,'254,2.80,3.15,2.45,,,,2.85,,,,3.50,3.75,3.50,0.0


In [23]:
# Merge df_final_agg_subject_2023 with df_dropout_2023 on District Name, District ID and County

df_combined = df_agg_levels_subject_2023.merge(df_dropout_2023, on=['distname', 'district_id','county'], how='inner')    # include 3 level of subjects

# Display the final combined DataFrame
df_combined

Unnamed: 0,distname,district_id,county,Mathematics (Approaches Grade Level),Reading/ELA (Approaches Grade Level),Writing (Approaches Grade Level),Science (Approaches Grade Level),Social Studies (Approaches Grade Level),Mathematics (Meets Grade Level),Reading/ELA (Meets Grade Level),...,American Indian Dropout Rate,Asian Dropout Rate,Hispanic Dropout Rate,Pacific Islander Dropout Rate,Two or More Races Dropout Rate,White Dropout Rate,Econ Disadv Dropout Rate,Special Ed Dropout Rate,At Risk Dropout Rate,EB/EL Dropout Rate
0,CAYUGA ISD,1902,'001,46.783784,51.306306,,42.388889,38.277778,29.954955,35.288288,...,,,0.00,,0.0,0.00,0.00,0.00,0.00,
1,ELKHART ISD,1903,'001,52.300000,54.652542,,51.071429,53.611111,26.850000,34.635593,...,,,0.00,,0.0,0.00,0.00,0.00,0.00,
2,FRANKSTON ISD,1904,'001,49.540323,53.303279,,42.250000,46.200000,31.233871,32.426230,...,,,0.00,,0.0,0.55,0.45,3.05,0.50,0.0
3,NECHES ISD,1906,'001,45.344086,51.578947,,42.645161,41.733333,34.043011,28.810526,...,,,0.00,,,0.00,0.00,0.00,0.00,
4,PALESTINE ISD,1907,'001,66.485714,64.871166,,58.534483,50.833333,36.668571,33.257669,...,,0.0,0.10,,0.0,0.15,0.10,0.00,0.15,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1204,NEWCASTLE ISD,252902,'252,45.000000,48.722222,,49.343750,31.529412,26.505747,32.533333,...,,,0.00,,,0.00,0.00,0.00,0.00,
1205,OLNEY ISD,252903,'252,43.914729,49.048000,,52.022222,36.809524,28.767442,32.376000,...,,,0.55,,0.0,1.50,1.05,1.50,2.05,0.0
1206,ZAPATA COUNTY ISD,253901,'253,73.656250,75.965517,,67.069767,49.285714,42.796875,43.603448,...,,,1.05,,,0.00,1.20,1.55,1.65,2.0
1207,CRYSTAL CITY ISD,254901,'254,47.719626,48.897196,,44.611111,42.277778,19.205607,19.644860,...,,,2.85,,,,3.50,3.75,3.50,0.0


In [None]:
# Here, we aggregate our data back into a single DataFrame
# List of additional columns to aggregate
additional_columns = [
    'distname',
    'district_id',
    'county',

    # DREF:

    'DFLCHART',
    'DFLALTED',
    'ASVAB_STATUS',

    # DTYPE:

    'TEA Description',
    'NCES Description',
    'Charter School (Y/N)',

    # Demography
    'District 2023 Student Membership: All Students Count',
    'District 2023 Student Membership: Male Percent',
    'District 2023 Student Membership: Female Percent',
    'District 2023 Student Membership: African American Percent',
    'District 2023 Student Membership: American Indian Percent',
    'District 2023 Student Membership: Asian Percent',
    'District 2023 Student Membership: Hispanic Percent',
    'District 2023 Student Membership: Pacific Islander Percent',
    'District 2023 Student Membership: Two or More Races Percent',
    'District 2023 Student Membership: White Percent',
    'District 2023 Student Membership: Econ Disadv Percent',
    'District 2023 Student Membership: Special Ed Percent',
    'District 2023 Student Membership: Gifted & Talented Percent',
    'District 2023 Student Membership: EB/EL Percent',
    'District 2023 Student Membership: At Risk Percent',
    'District 2023 Student Membership: Immigrant Percent',
    'District 2023 Student Membership: Gifted & Talented Percent',
    'District 2023 Staff: Teacher Student Ratio',

    # CCMR Rates
    'District 2022 College, Career, & Military Ready Graduates: All Students Rate',
    'District 2022 College, Career, & Military Ready Graduates: Male Rate',
    'District 2022 College, Career, & Military Ready Graduates: Female Rate',
    'District 2022 College, Career, & Military Ready Graduates: African American Rate',
    'District 2022 College, Career, & Military Ready Graduates: Hispanic Rate',
    'District 2022 College, Career, & Military Ready Graduates: White Rate',
    'District 2022 College, Career, & Military Ready Graduates: American Indian Rate',
    'District 2022 College, Career, & Military Ready Graduates: Asian Rate',
    'District 2022 College, Career, & Military Ready Graduates: Pacific Islander Rate',
    'District 2022 College, Career, & Military Ready Graduates: Two or More Races Rate',
    'District 2022 College, Career, & Military Ready Graduates: Econ Disadv Rate',
    'District 2022 College, Career, & Military Ready Graduates: Special Ed Rate',
    'District 2022 College, Career, & Military Ready Graduates: EB/EL Rate',
    'District 2022 College, Career, & Military Ready Graduates: At Risk Rate',


    # Attendence Rates
    'District 2022 Attendance: All Students Rate',
    'District 2022 Attendance: Two or More Races Rate',
    'District 2022 Attendance: Asian Rate',
    'District 2022 Attendance: Pacific Islander Rate',
    'District 2022 Attendance: African American Rate',
    'District 2022 Attendance: Hispanic Rate',
    'District 2022 Attendance: White Rate',
    'District 2022 Attendance: American Indian Rate',
    'District 2022 Attendance: Econ Disadv Rate',
    'District 2022 Attendance: Special Ed Rate',
    'District 2022 Attendance: Female Rate',
    'District 2022 Attendance: Male Rate',
    'District 2022 Attendance: EB/EL Rate',
    'District 2022 Attendance: At Risk Rate',

    # Chronic Absenteeism Rates
    '2022 district Chronic Absenteeism All Students Group: Rate',
    '2022 district Chronic Absenteeism African American Group: Rate',
    '2022 district Chronic Absenteeism Hispanic Group: Rate',
    '2022 district Chronic Absenteeism White Group: Rate',
    '2022 district Chronic Absenteeism American Indian Group: Rate',
    '2022 district Chronic Absenteeism Asian Group: Rate',
    '2022 district Chronic Absenteeism Pacific Islander Group: Rate',
    '2022 district Chronic Absenteeism Two or More Races Group: Rate',
    '2022 district Chronic Absenteeism Econ Disadv Group: Rate',
    '2022 district Chronic Absenteeism Special Ed Group: Rate',
    '2022 district Chronic Absenteeism EL Group: Rate',
    '2022 district Chronic Absenteeism At Risk Group: Rate',

    # 4-Year Longitudinal
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for All Students Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Female Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Male Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for African American Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for American Indian Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Asian Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Hispanic Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Pacific Islander Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for White Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Two or More Races Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Econ Disadv Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Special Ed Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for EB/EL Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for At Risk Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for All Students Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Male Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Female Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for African American Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for American Indian Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Asian Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Hispanic Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Pacific Islander Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for White Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Two or More Races Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Econ Disadv Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Special Ed Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for EB/EL Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for At Risk Rate',

    # AP/IB
    'District 2022 AP/IB Course Completion Graduates: All Students Rate',
    'District 2022 AP/IB Course Completion Graduates: African American Rate',
    'District 2022 AP/IB Course Completion Graduates: Hispanic Rate',
    'District 2022 AP/IB Course Completion Graduates: White Rate',
    'District 2022 AP/IB Course Completion Graduates: American Indian Rate',
    'District 2022 AP/IB Course Completion Graduates: Asian Rate',
    'District 2022 AP/IB Course Completion Graduates: Pacific Islander Rate',
    'District 2022 AP/IB Course Completion Graduates: Two or More Races Rate',
    'District 2022 AP/IB Course Completion Graduates: Male Rate',
    'District 2022 AP/IB Course Completion Graduates: Female Rate',
    'District 2022 AP/IB Course Completion Graduates: Econ Disadv Rate',
    'District 2022 AP/IB Course Completion Graduates: Special Ed Rate',
    'District 2022 AP/IB Course Completion Graduates: EB/EL Rate',
    'District 2022 AP/IB Course Completion Graduates: At Risk Rate',
    'District 2022 AP/IB: All Students (All Subjects) % Taking',
    'District 2022 AP/IB: Male (All Subjects) % Taking',
    'District 2022 AP/IB: Female (All Subjects) % Taking',
    'District 2022 AP/IB: African American (All Subjects) % Taking',
    'District 2022 AP/IB: American Indian (All Subjects) % Taking',
    'District 2022 AP/IB: Asian (All Subjects) % Taking',
    'District 2022 AP/IB: Hispanic (All Subjects) % Taking',
    'District 2022 AP/IB: Two or More Races (All Subjects) % Taking',
    'District 2022 AP/IB: Pacific Islander (All Subjects) % Taking',
    'District 2022 AP/IB: White (All Subjects) % Taking',
    'District 2022 AP/IB: Special Ed (All Subjects) % Taking',
    'District 2022 AP/IB: Econ Disadv (All Subjects) % Taking',
    'District 2022 AP/IB: EB/EL (All Subjects) % Taking',
    'District 2022 AP/IB: At Risk (All Subjects) % Taking',
    'District 2022 AP/IB: All Students (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Female (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Male (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: African American (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: American Indian (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Asian (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Hispanic (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Two or More Races (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Pacific Islander (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: White (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Special Ed (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Econ Disadv (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: EB/EL (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: At Risk (All Subjects) % Students Above Criterion',

    # SAT/ACT
    'District 2022 SAT/ACT: All Students, % Above Criterion',
    'District 2022 SAT/ACT: Female Students, % Above Criterion',
    'District 2022 SAT/ACT: Male Students, % Above Criterion',
    'District 2022 SAT/ACT: African American Students, % Above Criterion',
    'District 2022 SAT/ACT: American Indian Students, % Above Criterion',
    'District 2022 SAT/ACT: Asian Students, % Above Criterion',
    'District 2022 SAT/ACT: Hispanic Students, % Above Criterion',
    'District 2022 SAT/ACT: Two or More Races Students, % Above Criterion',
    'District 2022 SAT/ACT: Pacific Islander Students, % Above Criterion',
    'District 2022 SAT/ACT: White Students, % Above Criterion',
    'District 2022 SAT/ACT: Special Ed Students, % Above Criterion',
    'District 2022 SAT/ACT: Econ Disadv Students, % Above Criterion',
    'District 2022 SAT/ACT: EL Students, % Above Criterion',
    'District 2022 SAT/ACT: At Risk Students, % Above Criterion',
    'District 2022 SAT/ACT: All Students, % Test-Taking',
    'District 2022 SAT/ACT: Female Students, % Test-Taking',
    'District 2022 SAT/ACT: Male Students, % Test-Taking',
    'District 2022 SAT/ACT: African American Students, % Test-Taking',
    'District 2022 SAT/ACT: American Indian Students, % Test-Taking',
    'District 2022 SAT/ACT: Asian Students, % Test-Taking',
    'District 2022 SAT/ACT: Hispanic Students, % Test-Taking',
    'District 2022 SAT/ACT: Two or More Races Students, % Test-Taking',
    'District 2022 SAT/ACT: Pacific Islander Students, % Test-Taking',
    'District 2022 SAT/ACT: White Students, % Test-Taking',
    'District 2022 SAT/ACT: Special Ed Students, % Test-Taking',
    'District 2022 SAT/ACT: Econ Disadv Students, % Test-Taking',
    'District 2022 SAT/ACT: EL Students, % Test-Taking',
    'District 2022 SAT/ACT: At Risk Students, % Test-Taking',
    'District 2022 SAT/ACT: All Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Male Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Female Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: African American Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Hispanic Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: White Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: American Indian Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Asian Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Pacific Islander Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Two or More Races Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Econ Disadv Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: At Risk Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: EL Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Special Ed Students, % Graduates Above Criterion',
]

# Convert additional_columns to lowercase to match the dataset
additional_columns = [col.lower() for col in additional_columns]


# Combine the dropout DataFrame with additional columns on District Name, District ID, and County
df_combined2 = df_combined.merge(dataframes['merged_2023'][additional_columns], on=['distname', 'district_id', 'county'], how='inner')

df_combined2

Unnamed: 0,distname,district_id,county,Mathematics (Approaches Grade Level),Reading/ELA (Approaches Grade Level),Writing (Approaches Grade Level),Science (Approaches Grade Level),Social Studies (Approaches Grade Level),Mathematics (Meets Grade Level),Reading/ELA (Meets Grade Level),...,"district 2022 sat/act: hispanic students, % graduates above criterion","district 2022 sat/act: white students, % graduates above criterion","district 2022 sat/act: american indian students, % graduates above criterion","district 2022 sat/act: asian students, % graduates above criterion","district 2022 sat/act: pacific islander students, % graduates above criterion","district 2022 sat/act: two or more races students, % graduates above criterion","district 2022 sat/act: econ disadv students, % graduates above criterion","district 2022 sat/act: at risk students, % graduates above criterion","district 2022 sat/act: el students, % graduates above criterion","district 2022 sat/act: special ed students, % graduates above criterion"
0,CAYUGA ISD,1902,'001,46.783784,51.306306,,42.388889,38.277778,29.954955,35.288288,...,,10.8,,,,,20.0,8.3,,0.0
1,ELKHART ISD,1903,'001,52.300000,54.652542,,51.071429,53.611111,26.850000,34.635593,...,16.7,16.9,,,,,15.8,0.0,,0.0
2,FRANKSTON ISD,1904,'001,49.540323,53.303279,,42.250000,46.200000,31.233871,32.426230,...,,35.9,,,,,11.1,11.1,,
3,NECHES ISD,1906,'001,45.344086,51.578947,,42.645161,41.733333,34.043011,28.810526,...,,31.8,,,,,16.7,0.0,,
4,PALESTINE ISD,1907,'001,66.485714,64.871166,,58.534483,50.833333,36.668571,33.257669,...,14.9,40.3,,,,12.5,10.5,3.6,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1204,NEWCASTLE ISD,252902,'252,45.000000,48.722222,,49.343750,31.529412,26.505747,32.533333,...,,0.0,,,,,11.1,9.1,,0.0
1205,OLNEY ISD,252903,'252,43.914729,49.048000,,52.022222,36.809524,28.767442,32.376000,...,12.5,33.3,,,,,13.8,0.0,,0.0
1206,ZAPATA COUNTY ISD,253901,'253,73.656250,75.965517,,67.069767,49.285714,42.796875,43.603448,...,4.2,,,,,,2.5,0.0,0.0,0.0
1207,CRYSTAL CITY ISD,254901,'254,47.719626,48.897196,,44.611111,42.277778,19.205607,19.644860,...,0.0,,,,,,0.0,0.0,,0.0


In [50]:
# Desired order of columns
desired_column_order = [
    # General
    'distname',
    'district_id',
    'county',

    # DREF:

    'DFLCHART',
    'DFLALTED',
    'ASVAB_STATUS',

    # DTYPE:

    'TEA Description',
    'NCES Description',
    'Charter School (Y/N)',

    # Masters Grade Level
    'Mathematics (Masters Grade Level)',
    'Reading/ELA (Masters Grade Level)',
    'Writing (Masters Grade Level)',
    'Science (Masters Grade Level)',
    'Social Studies (Masters Grade Level)',

    # Meets Grade Level
    'Mathematics (Meets Grade Level)',
    'Reading/ELA (Meets Grade Level)',
    'Writing (Meets Grade Level)',
    'Science (Meets Grade Level)',
    'Social Studies (Meets Grade Level)',

    # Approaches Grade Level
    'Mathematics (Approaches Grade Level)',
    'Reading/ELA (Approaches Grade Level)',
    'Writing (Approaches Grade Level)',
    'Science (Approaches Grade Level)',
    'Social Studies (Approaches Grade Level)',

    # Dropout Rates
    'All Students Dropout Rate',
    'Male Dropout Rate',
    'African American Dropout Rate',
    'American Indian Dropout Rate',
    'Asian Dropout Rate',
    'Hispanic Dropout Rate',
    'Pacific Islander Dropout Rate',
    'Two or More Races Dropout Rate',
    'White Dropout Rate',
    'Econ Disadv Dropout Rate',
    'Special Ed Dropout Rate',
    'At Risk Dropout Rate',
    'EB/EL Dropout Rate',

    # Demographic Metrics
    'District 2023 Student Membership: All Students Count',
    'District 2023 Student Membership: Male Percent',
    'District 2023 Student Membership: Female Percent',
    'District 2023 Student Membership: African American Percent',
    'District 2023 Student Membership: American Indian Percent',
    'District 2023 Student Membership: Asian Percent',
    'District 2023 Student Membership: Hispanic Percent',
    'District 2023 Student Membership: Pacific Islander Percent',
    'District 2023 Student Membership: Two or More Races Percent',
    'District 2023 Student Membership: White Percent',
    'District 2023 Student Membership: Econ Disadv Percent',
    'District 2023 Student Membership: Special Ed Percent',
    'District 2023 Student Membership: Gifted & Talented Percent',
    'District 2023 Student Membership: EB/EL Percent',
    'District 2023 Student Membership: At Risk Percent',
    'District 2023 Student Membership: Immigrant Percent',
    'District 2023 Student Membership: Gifted & Talented Percent',
    'District 2023 Staff: Teacher Student Ratio',

    # CCMR Rates
    'District 2022 College, Career, & Military Ready Graduates: All Students Rate',
    'District 2022 College, Career, & Military Ready Graduates: Male Rate',
    'District 2022 College, Career, & Military Ready Graduates: Female Rate',
    'District 2022 College, Career, & Military Ready Graduates: African American Rate',
    'District 2022 College, Career, & Military Ready Graduates: Hispanic Rate',
    'District 2022 College, Career, & Military Ready Graduates: White Rate',
    'District 2022 College, Career, & Military Ready Graduates: American Indian Rate',
    'District 2022 College, Career, & Military Ready Graduates: Asian Rate',
    'District 2022 College, Career, & Military Ready Graduates: Pacific Islander Rate',
    'District 2022 College, Career, & Military Ready Graduates: Two or More Races Rate',
    'District 2022 College, Career, & Military Ready Graduates: Econ Disadv Rate',
    'District 2022 College, Career, & Military Ready Graduates: Special Ed Rate',
    'District 2022 College, Career, & Military Ready Graduates: EB/EL Rate',
    'District 2022 College, Career, & Military Ready Graduates: At Risk Rate',


    # Attendence Rates
    'District 2022 Attendance: All Students Rate',
    'District 2022 Attendance: Two or More Races Rate',
    'District 2022 Attendance: Asian Rate',
    'District 2022 Attendance: Pacific Islander Rate',
    'District 2022 Attendance: African American Rate',
    'District 2022 Attendance: Hispanic Rate',
    'District 2022 Attendance: White Rate',
    'District 2022 Attendance: American Indian Rate',
    'District 2022 Attendance: Econ Disadv Rate',
    'District 2022 Attendance: Special Ed Rate',
    'District 2022 Attendance: Female Rate',
    'District 2022 Attendance: Male Rate',
    'District 2022 Attendance: EB/EL Rate',
    'District 2022 Attendance: At Risk Rate',

    # Chronic Absenteeism Rates
    '2022 district Chronic Absenteeism All Students Group: Rate',
    '2022 district Chronic Absenteeism African American Group: Rate',
    '2022 district Chronic Absenteeism Hispanic Group: Rate',
    '2022 district Chronic Absenteeism White Group: Rate',
    '2022 district Chronic Absenteeism American Indian Group: Rate',
    '2022 district Chronic Absenteeism Asian Group: Rate',
    '2022 district Chronic Absenteeism Pacific Islander Group: Rate',
    '2022 district Chronic Absenteeism Two or More Races Group: Rate',
    '2022 district Chronic Absenteeism Econ Disadv Group: Rate',
    '2022 district Chronic Absenteeism Special Ed Group: Rate',
    '2022 district Chronic Absenteeism EL Group: Rate',
    '2022 district Chronic Absenteeism At Risk Group: Rate',

    # 4-Year Longitudinal
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for All Students Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Female Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Male Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for African American Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for American Indian Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Asian Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Hispanic Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Pacific Islander Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for White Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Two or More Races Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Econ Disadv Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Special Ed Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for EB/EL Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for At Risk Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for All Students Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Male Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Female Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for African American Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for American Indian Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Asian Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Hispanic Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Pacific Islander Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for White Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Two or More Races Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Econ Disadv Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Special Ed Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for EB/EL Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for At Risk Rate',

    # AP/IB
    'District 2022 AP/IB Course Completion Graduates: All Students Rate',
    'District 2022 AP/IB Course Completion Graduates: African American Rate',
    'District 2022 AP/IB Course Completion Graduates: Hispanic Rate',
    'District 2022 AP/IB Course Completion Graduates: White Rate',
    'District 2022 AP/IB Course Completion Graduates: American Indian Rate',
    'District 2022 AP/IB Course Completion Graduates: Asian Rate',
    'District 2022 AP/IB Course Completion Graduates: Pacific Islander Rate',
    'District 2022 AP/IB Course Completion Graduates: Two or More Races Rate',
    'District 2022 AP/IB Course Completion Graduates: Male Rate',
    'District 2022 AP/IB Course Completion Graduates: Female Rate',
    'District 2022 AP/IB Course Completion Graduates: Econ Disadv Rate',
    'District 2022 AP/IB Course Completion Graduates: Special Ed Rate',
    'District 2022 AP/IB Course Completion Graduates: EB/EL Rate',
    'District 2022 AP/IB Course Completion Graduates: At Risk Rate',
    'District 2022 AP/IB: All Students (All Subjects) % Taking',
    'District 2022 AP/IB: Male (All Subjects) % Taking',
    'District 2022 AP/IB: Female (All Subjects) % Taking',
    'District 2022 AP/IB: African American (All Subjects) % Taking',
    'District 2022 AP/IB: American Indian (All Subjects) % Taking',
    'District 2022 AP/IB: Asian (All Subjects) % Taking',
    'District 2022 AP/IB: Hispanic (All Subjects) % Taking',
    'District 2022 AP/IB: Two or More Races (All Subjects) % Taking',
    'District 2022 AP/IB: Pacific Islander (All Subjects) % Taking',
    'District 2022 AP/IB: White (All Subjects) % Taking',
    'District 2022 AP/IB: Special Ed (All Subjects) % Taking',
    'District 2022 AP/IB: Econ Disadv (All Subjects) % Taking',
    'District 2022 AP/IB: EB/EL (All Subjects) % Taking',
    'District 2022 AP/IB: At Risk (All Subjects) % Taking',
    'District 2022 AP/IB: All Students (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Female (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Male (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: African American (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: American Indian (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Asian (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Hispanic (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Two or More Races (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Pacific Islander (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: White (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Special Ed (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Econ Disadv (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: EB/EL (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: At Risk (All Subjects) % Students Above Criterion',

    # SAT/ACT
    'District 2022 SAT/ACT: All Students, % Above Criterion',
    'District 2022 SAT/ACT: Female Students, % Above Criterion',
    'District 2022 SAT/ACT: Male Students, % Above Criterion',
    'District 2022 SAT/ACT: African American Students, % Above Criterion',
    'District 2022 SAT/ACT: American Indian Students, % Above Criterion',
    'District 2022 SAT/ACT: Asian Students, % Above Criterion',
    'District 2022 SAT/ACT: Hispanic Students, % Above Criterion',
    'District 2022 SAT/ACT: Two or More Races Students, % Above Criterion',
    'District 2022 SAT/ACT: Pacific Islander Students, % Above Criterion',
    'District 2022 SAT/ACT: White Students, % Above Criterion',
    'District 2022 SAT/ACT: Special Ed Students, % Above Criterion',
    'District 2022 SAT/ACT: Econ Disadv Students, % Above Criterion',
    'District 2022 SAT/ACT: EL Students, % Above Criterion',
    'District 2022 SAT/ACT: At Risk Students, % Above Criterion',
    'District 2022 SAT/ACT: All Students, % Test-Taking',
    'District 2022 SAT/ACT: Female Students, % Test-Taking',
    'District 2022 SAT/ACT: Male Students, % Test-Taking',
    'District 2022 SAT/ACT: African American Students, % Test-Taking',
    'District 2022 SAT/ACT: American Indian Students, % Test-Taking',
    'District 2022 SAT/ACT: Asian Students, % Test-Taking',
    'District 2022 SAT/ACT: Hispanic Students, % Test-Taking',
    'District 2022 SAT/ACT: Two or More Races Students, % Test-Taking',
    'District 2022 SAT/ACT: Pacific Islander Students, % Test-Taking',
    'District 2022 SAT/ACT: White Students, % Test-Taking',
    'District 2022 SAT/ACT: Special Ed Students, % Test-Taking',
    'District 2022 SAT/ACT: Econ Disadv Students, % Test-Taking',
    'District 2022 SAT/ACT: EL Students, % Test-Taking',
    'District 2022 SAT/ACT: At Risk Students, % Test-Taking',
    'District 2022 SAT/ACT: All Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Male Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Female Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: African American Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Hispanic Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: White Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: American Indian Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Asian Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Pacific Islander Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Two or More Races Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Econ Disadv Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: At Risk Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: EL Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Special Ed Students, % Graduates Above Criterion',
]

# Convert dataset column names to lowercase for matching
df_combined2.columns = df_combined2.columns.str.lower()

# Create a mapping from lowercase to original column names
column_name_mapping = {col.lower(): col for col in df_combined2.columns}

# Convert desired order to lowercase for matching
desired_column_order_lower = [col.lower() for col in desired_column_order]

# Replace lowercase column names with the original casing where possible
final_column_order = [column_name_mapping[col] for col in desired_column_order_lower if col in column_name_mapping]

# Reorder the DataFrame using the properly cased column names
df_combined3 = df_combined2[final_column_order]

# Display the reordered DataFrame
df_combined3


Unnamed: 0,distname,district_id,county,dflchart,dflalted,asvab_status,tea description,nces description,charter school (y/n),mathematics (masters grade level),...,"district 2022 sat/act: hispanic students, % graduates above criterion","district 2022 sat/act: white students, % graduates above criterion","district 2022 sat/act: american indian students, % graduates above criterion","district 2022 sat/act: asian students, % graduates above criterion","district 2022 sat/act: pacific islander students, % graduates above criterion","district 2022 sat/act: two or more races students, % graduates above criterion","district 2022 sat/act: econ disadv students, % graduates above criterion","district 2022 sat/act: at risk students, % graduates above criterion","district 2022 sat/act: el students, % graduates above criterion","district 2022 sat/act: special ed students, % graduates above criterion"
0,CAYUGA ISD,1902,'001,N,N,,Rural,Rural-Remote,N,10.396396,...,,10.8,,,,,20.0,8.3,,0.0
1,ELKHART ISD,1903,'001,N,N,,Non-metropolitan Stable,Rural-Distant,N,8.883333,...,16.7,16.9,,,,,15.8,0.0,,0.0
2,FRANKSTON ISD,1904,'001,N,N,ASVAB Alternative Test Offered,Rural,Rural-Distant,N,10.758065,...,,35.9,,,,,11.1,11.1,,
3,NECHES ISD,1906,'001,N,N,,Rural,Rural-Distant,N,9.161290,...,,31.8,,,,,16.7,0.0,,
4,PALESTINE ISD,1907,'001,N,N,,Independent Town,Town-Distant,N,10.942857,...,14.9,40.3,,,,12.5,10.5,3.6,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1204,NEWCASTLE ISD,252902,'252,N,N,,Rural,Rural-Distant,N,9.000000,...,,0.0,,,,,11.1,9.1,,0.0
1205,OLNEY ISD,252903,'252,N,N,,Rural,Town-Distant,N,12.333333,...,12.5,33.3,,,,,13.8,0.0,,0.0
1206,ZAPATA COUNTY ISD,253901,'253,N,N,,Other Central City Suburban,Town-Remote,N,15.804688,...,4.2,,,,,,2.5,0.0,0.0,0.0
1207,CRYSTAL CITY ISD,254901,'254,N,N,,Non-metropolitan Stable,Town-Remote,N,4.177570,...,0.0,,,,,,0.0,0.0,,0.0


In [51]:
from sklearn.impute import SimpleImputer

# Define the columns to exclude from imputation
exclude_columns = ['distname', 'district_id', 'county', 'DFLCHART', 'DFLALTED', 'ASVAB_STATUS', 
                   'TEA Description', 'NCES Description', 'Charter School (Y/N)']

# Convert to lowercase for consistency
exclude_columns = [col.lower() for col in exclude_columns]

# Separate the columns to impute and the excluded columns
impute_columns = df_combined3.drop(columns=exclude_columns, errors='ignore')
excluded_data = df_combined3[exclude_columns]

# Identify and drop columns that are entirely NaN
fully_missing_cols = impute_columns.columns[impute_columns.isna().all()]
print("Dropping fully missing columns:", fully_missing_cols.tolist())  # Debugging check

impute_columns = impute_columns.drop(columns=fully_missing_cols, errors='ignore')

# Create an imputer object with the median strategy
imputer = SimpleImputer(strategy='median')

# Fit and transform the imputation on the selected columns
imputed_data = pd.DataFrame(imputer.fit_transform(impute_columns), columns=impute_columns.columns)

# Combine the imputed data with the excluded columns
df_combined3_drop_imputed = pd.concat([excluded_data.reset_index(drop=True), imputed_data.reset_index(drop=True)], axis=1)

# Check for missing values and preview the imputed DataFrame
print(df_combined3_drop_imputed.isnull().sum())  # Should show 0 for all columns

final_2023_combined = df_combined3_drop_imputed

final_2023_combined

Dropping fully missing columns: ['writing (masters grade level)', 'writing (meets grade level)', 'writing (approaches grade level)']
distname                                                                          0
district_id                                                                       0
county                                                                            0
dflchart                                                                          0
dflalted                                                                          0
                                                                                 ..
district 2022 sat/act: two or more races students, % graduates above criterion    0
district 2022 sat/act: econ disadv students, % graduates above criterion          0
district 2022 sat/act: at risk students, % graduates above criterion              0
district 2022 sat/act: el students, % graduates above criterion                   0
district 2022 sat/act: spec

Unnamed: 0,distname,district_id,county,dflchart,dflalted,asvab_status,tea description,nces description,charter school (y/n),mathematics (masters grade level),...,"district 2022 sat/act: hispanic students, % graduates above criterion","district 2022 sat/act: white students, % graduates above criterion","district 2022 sat/act: american indian students, % graduates above criterion","district 2022 sat/act: asian students, % graduates above criterion","district 2022 sat/act: pacific islander students, % graduates above criterion","district 2022 sat/act: two or more races students, % graduates above criterion","district 2022 sat/act: econ disadv students, % graduates above criterion","district 2022 sat/act: at risk students, % graduates above criterion","district 2022 sat/act: el students, % graduates above criterion","district 2022 sat/act: special ed students, % graduates above criterion"
0,CAYUGA ISD,1902,'001,N,N,,Rural,Rural-Remote,N,10.396396,...,9.1,10.8,20.0,55.6,21.4,20.0,20.0,8.3,0.0,0.0
1,ELKHART ISD,1903,'001,N,N,,Non-metropolitan Stable,Rural-Distant,N,8.883333,...,16.7,16.9,20.0,55.6,21.4,20.0,15.8,0.0,0.0,0.0
2,FRANKSTON ISD,1904,'001,N,N,ASVAB Alternative Test Offered,Rural,Rural-Distant,N,10.758065,...,9.1,35.9,20.0,55.6,21.4,20.0,11.1,11.1,0.0,0.0
3,NECHES ISD,1906,'001,N,N,,Rural,Rural-Distant,N,9.161290,...,9.1,31.8,20.0,55.6,21.4,20.0,16.7,0.0,0.0,0.0
4,PALESTINE ISD,1907,'001,N,N,,Independent Town,Town-Distant,N,10.942857,...,14.9,40.3,20.0,55.6,21.4,12.5,10.5,3.6,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1204,NEWCASTLE ISD,252902,'252,N,N,,Rural,Rural-Distant,N,9.000000,...,9.1,0.0,20.0,55.6,21.4,20.0,11.1,9.1,0.0,0.0
1205,OLNEY ISD,252903,'252,N,N,,Rural,Town-Distant,N,12.333333,...,12.5,33.3,20.0,55.6,21.4,20.0,13.8,0.0,0.0,0.0
1206,ZAPATA COUNTY ISD,253901,'253,N,N,,Other Central City Suburban,Town-Remote,N,15.804688,...,4.2,21.1,20.0,55.6,21.4,20.0,2.5,0.0,0.0,0.0
1207,CRYSTAL CITY ISD,254901,'254,N,N,,Non-metropolitan Stable,Town-Remote,N,4.177570,...,0.0,21.1,20.0,55.6,21.4,20.0,0.0,0.0,0.0,0.0


In [52]:
# Making a demographic dataset
demographic_columns = [
    # General
    'distname',
    'district_id',
    'county',

    # DREF:
    'dflchart',
    'dflalted',
    'asvab_status',

    # DTYPE:
    'tea description',
    'nces description',
    'charter school (y/n)',

    # Demographic Metrics
    'district 2023 student membership: all students count',
    'district 2023 student membership: male percent',
    'district 2023 student membership: female percent',
    'district 2023 student membership: african american percent',
    'district 2023 student membership: american indian percent',
    'district 2023 student membership: asian percent',
    'district 2023 student membership: hispanic percent',
    'district 2023 student membership: pacific islander percent',
    'district 2023 student membership: two or more races percent',
    'district 2023 student membership: white percent',
    'district 2023 student membership: econ disadv percent',
    'district 2023 student membership: special ed percent',
    'district 2023 student membership: gifted & talented percent',
    'district 2023 student membership: eb/el percent',
    'district 2023 student membership: at risk percent',
    'district 2023 student membership: immigrant percent',
    'district 2023 student membership: gifted & talented percent',
    'district 2023 staff: teacher student ratio'
]

# Ensure column names are lowercase for consistency
demographic_columns = [col.lower() for col in demographic_columns]

# Extract the subset of columns from final_2023_combined
final_2023_demographics = final_2023_combined[demographic_columns]

# Verify the shape and preview the data
print(final_2023_demographics.shape)  # Should match expected number of rows and selected columns
final_2023_demographics

(1209, 33)


Unnamed: 0,distname,district_id,county,dflchart,dflalted,asvab_status,tea description,nces description,charter school (y/n),district 2023 student membership: all students count,...,district 2023 student membership: gifted & talented percent,district 2023 student membership: gifted & talented percent.1,district 2023 student membership: eb/el percent,district 2023 student membership: at risk percent,district 2023 student membership: immigrant percent,district 2023 student membership: gifted & talented percent.2,district 2023 student membership: gifted & talented percent.3,district 2023 student membership: gifted & talented percent.4,district 2023 student membership: gifted & talented percent.5,district 2023 staff: teacher student ratio
0,CAYUGA ISD,1902,'001,N,N,,Rural,Rural-Remote,N,593.0,...,5.1,5.1,1.3,40.0,0.0,5.1,5.1,5.1,5.1,12.3
1,ELKHART ISD,1903,'001,N,N,,Non-metropolitan Stable,Rural-Distant,N,1194.0,...,4.7,4.7,2.9,29.5,0.0,4.7,4.7,4.7,4.7,11.4
2,FRANKSTON ISD,1904,'001,N,N,ASVAB Alternative Test Offered,Rural,Rural-Distant,N,801.0,...,6.5,6.5,4.4,36.2,0.0,6.5,6.5,6.5,6.5,11.8
3,NECHES ISD,1906,'001,N,N,,Rural,Rural-Distant,N,309.0,...,7.1,7.1,2.3,33.7,0.0,7.1,7.1,7.1,7.1,11.0
4,PALESTINE ISD,1907,'001,N,N,,Independent Town,Town-Distant,N,3296.0,...,1.9,1.9,17.5,54.9,1.2,1.9,1.9,1.9,1.9,13.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1204,NEWCASTLE ISD,252902,'252,N,N,,Rural,Rural-Distant,N,219.0,...,2.3,2.3,3.7,36.1,0.5,2.3,2.3,2.3,2.3,9.5
1205,OLNEY ISD,252903,'252,N,N,,Rural,Town-Distant,N,726.0,...,4.0,4.0,8.7,39.5,2.3,4.0,4.0,4.0,4.0,10.0
1206,ZAPATA COUNTY ISD,253901,'253,N,N,,Other Central City Suburban,Town-Remote,N,3373.0,...,7.1,7.1,31.9,56.5,0.4,7.1,7.1,7.1,7.1,14.1
1207,CRYSTAL CITY ISD,254901,'254,N,N,,Non-metropolitan Stable,Town-Remote,N,1794.0,...,5.7,5.7,2.2,53.0,0.1,5.7,5.7,5.7,5.7,16.0


In [59]:
# Making a performance dataset
performance_columns = [
    # General
    'distname',
    'district_id',
    'county',

    # DREF:
    'dflchart',
    'dflalted',
    'asvab_status',

    # DTYPE:
    'tea description',
    'nces description',
    'charter school (y/n)',

   # Masters Grade Level
    'Mathematics (Masters Grade Level)',
    'Reading/ELA (Masters Grade Level)',
    'Writing (Masters Grade Level)',
    'Science (Masters Grade Level)',
    'Social Studies (Masters Grade Level)',

    # Meets Grade Level
    'Mathematics (Meets Grade Level)',
    'Reading/ELA (Meets Grade Level)',
    'Writing (Meets Grade Level)',
    'Science (Meets Grade Level)',
    'Social Studies (Meets Grade Level)',

    # Approaches Grade Level
    'Mathematics (Approaches Grade Level)',
    'Reading/ELA (Approaches Grade Level)',
    'Writing (Approaches Grade Level)',
    'Science (Approaches Grade Level)',
    'Social Studies (Approaches Grade Level)',

    # Dropout Rates
    'All Students Dropout Rate',
    'Male Dropout Rate',
    'African American Dropout Rate',
    'American Indian Dropout Rate',
    'Asian Dropout Rate',
    'Hispanic Dropout Rate',
    'Pacific Islander Dropout Rate',
    'Two or More Races Dropout Rate',
    'White Dropout Rate',
    'Econ Disadv Dropout Rate',
    'Special Ed Dropout Rate',
    'At Risk Dropout Rate',
    'EB/EL Dropout Rate',

     # CCMR Rates
    'District 2022 College, Career, & Military Ready Graduates: All Students Rate',
    'District 2022 College, Career, & Military Ready Graduates: Male Rate',
    'District 2022 College, Career, & Military Ready Graduates: Female Rate',
    'District 2022 College, Career, & Military Ready Graduates: African American Rate',
    'District 2022 College, Career, & Military Ready Graduates: Hispanic Rate',
    'District 2022 College, Career, & Military Ready Graduates: White Rate',
    'District 2022 College, Career, & Military Ready Graduates: American Indian Rate',
    'District 2022 College, Career, & Military Ready Graduates: Asian Rate',
    'District 2022 College, Career, & Military Ready Graduates: Pacific Islander Rate',
    'District 2022 College, Career, & Military Ready Graduates: Two or More Races Rate',
    'District 2022 College, Career, & Military Ready Graduates: Econ Disadv Rate',
    'District 2022 College, Career, & Military Ready Graduates: Special Ed Rate',
    'District 2022 College, Career, & Military Ready Graduates: EB/EL Rate',
    'District 2022 College, Career, & Military Ready Graduates: At Risk Rate',


    # Attendence Rates
    'District 2022 Attendance: All Students Rate',
    'District 2022 Attendance: Two or More Races Rate',
    'District 2022 Attendance: Asian Rate',
    'District 2022 Attendance: Pacific Islander Rate',
    'District 2022 Attendance: African American Rate',
    'District 2022 Attendance: Hispanic Rate',
    'District 2022 Attendance: White Rate',
    'District 2022 Attendance: American Indian Rate',
    'District 2022 Attendance: Econ Disadv Rate',
    'District 2022 Attendance: Special Ed Rate',
    'District 2022 Attendance: Female Rate',
    'District 2022 Attendance: Male Rate',
    'District 2022 Attendance: EB/EL Rate',
    'District 2022 Attendance: At Risk Rate',

    # Chronic Absenteeism Rates
    '2022 district Chronic Absenteeism All Students Group: Rate',
    '2022 district Chronic Absenteeism African American Group: Rate',
    '2022 district Chronic Absenteeism Hispanic Group: Rate',
    '2022 district Chronic Absenteeism White Group: Rate',
    '2022 district Chronic Absenteeism American Indian Group: Rate',
    '2022 district Chronic Absenteeism Asian Group: Rate',
    '2022 district Chronic Absenteeism Pacific Islander Group: Rate',
    '2022 district Chronic Absenteeism Two or More Races Group: Rate',
    '2022 district Chronic Absenteeism Econ Disadv Group: Rate',
    '2022 district Chronic Absenteeism Special Ed Group: Rate',
    '2022 district Chronic Absenteeism EL Group: Rate',
    '2022 district Chronic Absenteeism At Risk Group: Rate',

    # 4-Year Longitudinal
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for All Students Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Female Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Male Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for African American Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for American Indian Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Asian Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Hispanic Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Pacific Islander Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for White Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Two or More Races Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Econ Disadv Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for Special Ed Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for EB/EL Rate',
    'District 2022 4-Year Longitudinal: [FHSP-DLA Graduates] for At Risk Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for All Students Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Male Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Female Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for African American Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for American Indian Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Asian Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Hispanic Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Pacific Islander Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for White Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Two or More Races Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Econ Disadv Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for Special Ed Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for EB/EL Rate',
    'District 2022 4-Year Longitudinal: [RHSP/DAP or FHSP-E/DLA Graduates] for At Risk Rate',

    # AP/IB
    'District 2022 AP/IB Course Completion Graduates: All Students Rate',
    'District 2022 AP/IB Course Completion Graduates: African American Rate',
    'District 2022 AP/IB Course Completion Graduates: Hispanic Rate',
    'District 2022 AP/IB Course Completion Graduates: White Rate',
    'District 2022 AP/IB Course Completion Graduates: American Indian Rate',
    'District 2022 AP/IB Course Completion Graduates: Asian Rate',
    'District 2022 AP/IB Course Completion Graduates: Pacific Islander Rate',
    'District 2022 AP/IB Course Completion Graduates: Two or More Races Rate',
    'District 2022 AP/IB Course Completion Graduates: Male Rate',
    'District 2022 AP/IB Course Completion Graduates: Female Rate',
    'District 2022 AP/IB Course Completion Graduates: Econ Disadv Rate',
    'District 2022 AP/IB Course Completion Graduates: Special Ed Rate',
    'District 2022 AP/IB Course Completion Graduates: EB/EL Rate',
    'District 2022 AP/IB Course Completion Graduates: At Risk Rate',
    'District 2022 AP/IB: All Students (All Subjects) % Taking',
    'District 2022 AP/IB: Male (All Subjects) % Taking',
    'District 2022 AP/IB: Female (All Subjects) % Taking',
    'District 2022 AP/IB: African American (All Subjects) % Taking',
    'District 2022 AP/IB: American Indian (All Subjects) % Taking',
    'District 2022 AP/IB: Asian (All Subjects) % Taking',
    'District 2022 AP/IB: Hispanic (All Subjects) % Taking',
    'District 2022 AP/IB: Two or More Races (All Subjects) % Taking',
    'District 2022 AP/IB: Pacific Islander (All Subjects) % Taking',
    'District 2022 AP/IB: White (All Subjects) % Taking',
    'District 2022 AP/IB: Special Ed (All Subjects) % Taking',
    'District 2022 AP/IB: Econ Disadv (All Subjects) % Taking',
    'District 2022 AP/IB: EB/EL (All Subjects) % Taking',
    'District 2022 AP/IB: At Risk (All Subjects) % Taking',
    'District 2022 AP/IB: All Students (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Female (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Male (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: African American (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: American Indian (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Asian (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Hispanic (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Two or More Races (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Pacific Islander (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: White (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Special Ed (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: Econ Disadv (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: EB/EL (All Subjects) % Students Above Criterion',
    'District 2022 AP/IB: At Risk (All Subjects) % Students Above Criterion',

    # SAT/ACT
    'District 2022 SAT/ACT: All Students, % Above Criterion',
    'District 2022 SAT/ACT: Female Students, % Above Criterion',
    'District 2022 SAT/ACT: Male Students, % Above Criterion',
    'District 2022 SAT/ACT: African American Students, % Above Criterion',
    'District 2022 SAT/ACT: American Indian Students, % Above Criterion',
    'District 2022 SAT/ACT: Asian Students, % Above Criterion',
    'District 2022 SAT/ACT: Hispanic Students, % Above Criterion',
    'District 2022 SAT/ACT: Two or More Races Students, % Above Criterion',
    'District 2022 SAT/ACT: Pacific Islander Students, % Above Criterion',
    'District 2022 SAT/ACT: White Students, % Above Criterion',
    'District 2022 SAT/ACT: Special Ed Students, % Above Criterion',
    'District 2022 SAT/ACT: Econ Disadv Students, % Above Criterion',
    'District 2022 SAT/ACT: EL Students, % Above Criterion',
    'District 2022 SAT/ACT: At Risk Students, % Above Criterion',
    'District 2022 SAT/ACT: All Students, % Test-Taking',
    'District 2022 SAT/ACT: Female Students, % Test-Taking',
    'District 2022 SAT/ACT: Male Students, % Test-Taking',
    'District 2022 SAT/ACT: African American Students, % Test-Taking',
    'District 2022 SAT/ACT: American Indian Students, % Test-Taking',
    'District 2022 SAT/ACT: Asian Students, % Test-Taking',
    'District 2022 SAT/ACT: Hispanic Students, % Test-Taking',
    'District 2022 SAT/ACT: Two or More Races Students, % Test-Taking',
    'District 2022 SAT/ACT: Pacific Islander Students, % Test-Taking',
    'District 2022 SAT/ACT: White Students, % Test-Taking',
    'District 2022 SAT/ACT: Special Ed Students, % Test-Taking',
    'District 2022 SAT/ACT: Econ Disadv Students, % Test-Taking',
    'District 2022 SAT/ACT: EL Students, % Test-Taking',
    'District 2022 SAT/ACT: At Risk Students, % Test-Taking',
    'District 2022 SAT/ACT: All Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Male Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Female Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: African American Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Hispanic Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: White Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: American Indian Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Asian Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Pacific Islander Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Two or More Races Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Econ Disadv Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: At Risk Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: EL Students, % Graduates Above Criterion',
    'District 2022 SAT/ACT: Special Ed Students, % Graduates Above Criterion'
]

# Ensure column names are lowercase for consistency
performance_columns = [col.lower() for col in performance_columns]

# Identify and drop columns that are entirely NaN in final_2023_combined
perf_missing_cols = [col for col in performance_columns if col in df_combined3.columns and df_combined3[col].isna().all()]
print("Dropping fully missing columns:", perf_missing_cols)  # Debugging check

# Drop fully missing columns from the list
performance_columns = [col for col in performance_columns if col not in perf_missing_cols]

# Extract the subset of columns from final_2023_combined
final_2023_performance = final_2023_combined[performance_columns]

# Verify the shape and preview the data
print(final_2023_performance.shape)  # Should match expected number of rows and selected columns
final_2023_performance

Dropping fully missing columns: ['writing (masters grade level)', 'writing (meets grade level)', 'writing (approaches grade level)']
(1209, 186)


Unnamed: 0,distname,district_id,county,dflchart,dflalted,asvab_status,tea description,nces description,charter school (y/n),mathematics (masters grade level),...,"district 2022 sat/act: hispanic students, % graduates above criterion","district 2022 sat/act: white students, % graduates above criterion","district 2022 sat/act: american indian students, % graduates above criterion","district 2022 sat/act: asian students, % graduates above criterion","district 2022 sat/act: pacific islander students, % graduates above criterion","district 2022 sat/act: two or more races students, % graduates above criterion","district 2022 sat/act: econ disadv students, % graduates above criterion","district 2022 sat/act: at risk students, % graduates above criterion","district 2022 sat/act: el students, % graduates above criterion","district 2022 sat/act: special ed students, % graduates above criterion"
0,CAYUGA ISD,1902,'001,N,N,,Rural,Rural-Remote,N,10.396396,...,9.1,10.8,20.0,55.6,21.4,20.0,20.0,8.3,0.0,0.0
1,ELKHART ISD,1903,'001,N,N,,Non-metropolitan Stable,Rural-Distant,N,8.883333,...,16.7,16.9,20.0,55.6,21.4,20.0,15.8,0.0,0.0,0.0
2,FRANKSTON ISD,1904,'001,N,N,ASVAB Alternative Test Offered,Rural,Rural-Distant,N,10.758065,...,9.1,35.9,20.0,55.6,21.4,20.0,11.1,11.1,0.0,0.0
3,NECHES ISD,1906,'001,N,N,,Rural,Rural-Distant,N,9.161290,...,9.1,31.8,20.0,55.6,21.4,20.0,16.7,0.0,0.0,0.0
4,PALESTINE ISD,1907,'001,N,N,,Independent Town,Town-Distant,N,10.942857,...,14.9,40.3,20.0,55.6,21.4,12.5,10.5,3.6,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1204,NEWCASTLE ISD,252902,'252,N,N,,Rural,Rural-Distant,N,9.000000,...,9.1,0.0,20.0,55.6,21.4,20.0,11.1,9.1,0.0,0.0
1205,OLNEY ISD,252903,'252,N,N,,Rural,Town-Distant,N,12.333333,...,12.5,33.3,20.0,55.6,21.4,20.0,13.8,0.0,0.0,0.0
1206,ZAPATA COUNTY ISD,253901,'253,N,N,,Other Central City Suburban,Town-Remote,N,15.804688,...,4.2,21.1,20.0,55.6,21.4,20.0,2.5,0.0,0.0,0.0
1207,CRYSTAL CITY ISD,254901,'254,N,N,,Non-metropolitan Stable,Town-Remote,N,4.177570,...,0.0,21.1,20.0,55.6,21.4,20.0,0.0,0.0,0.0,0.0
