In [89]:
# Dependencies:
import pandas as pd


In [90]:
## open the school demographic data

df_public = pd.read_csv('data\MHSAA\school_info\school_info_PUBLIC.csv')
df_private = pd.read_csv('data\MHSAA\school_info\school_info_PRIVATE.csv')

# ## Add a column to each dataframe that indicates the type of school
# df_public['school_type'] = 'public'
# df_private['school_type'] = 'private'

In [91]:
# ## Transform the dataframes to have the same column names
# df_public.info()

# df_private.info()

In [92]:
# Transform the Private school data to more readable format

# Create a dictionary mapping locale codes to descriptions
locale_dict = {
    11: 'City: Large',
    12: 'City: Midsize',
    13: 'City: Small',
    21: 'Suburb: Large',
    22: 'Suburb: Midsize',
    23: 'Suburb: Small',
    31: 'Town: Fringe',
    32: 'Town: Distant',
    33: 'Town: Remote',
    41: 'Rural: Fringe',
    42: 'Rural: Distant',
    43: 'Rural: Remote'
}

relig_dict = {
    1: 'Catholic',
    2: 'Other Religious',
    3: 'Nonsectarian'

}

ort_dict = {
    1: 'Roman Catholic',
    2: 'African Methodist Episcopal',
    3: 'Amish',
    4: 'Assembly of God',
    5: 'Baptist',
    6: 'Brethren',
    7: 'Calvinist',
    8: 'Christian (no denomination specified)',
    9: 'Church of Christ',
    10: 'Church of God',
    11: 'Church of God in Christ',
    12: 'Church of the Nazarene',
    13: 'Disciples of Christ',
    14: 'Episcopal',
    15: 'Friends',
    16: 'Greek Orthodox',
    17: 'Islamic',
    18: 'Jewish',
    19: 'Latter Day Saints',
    20: 'Lutheran Church – Missouri Synod',
    21: 'Evangelical Lutheran Church in America',
    22: 'Wisconsin Evangelical Lutheran Synod',
    23: 'Other Lutheran',
    24: 'Mennonite',
    25: 'Methodist',
    26: 'Penecostal',
    27: 'Presbyterian',
    28: 'Seventh Day Adventists',
    29: 'Other',
    30: 'Nonsectarians'
}

## Change PSS_COMM_TYPE to a more readable format
# This is an higher level classification of Locale

# Create a dictionary mapping locale codes to descriptions
comm_dict = {
    1: 'City',
    2: 'Suburban',
    3: 'Town',
    4: 'Rural'
}

# Dictionary for COOED values
coed_dict = {
    1: 'Coed',
    2: 'Female_Only',
    3: 'Male_Only'
}


# Apply the mapping to the columns
df_private['PSS_COED'] = df_private['PSS_COED'].map(coed_dict)
df_private['PSS_LOCALE'] = df_private['PSS_LOCALE'].map(locale_dict)
df_private['PSS_COMM_TYPE'] = df_private['PSS_COMM_TYPE'].map(comm_dict)
df_private['PSS_ORIENT'] = df_private['PSS_ORIENT'].map(ort_dict)
df_private['PSS_RELIG'] = df_private['PSS_RELIG'].map(relig_dict)

## rework the PSS_PHONE column to match the public school format
## Currently in the format: 1234567890 want it in (123) 456-7890
df_private['PSS_PHONE'] = df_private['PSS_PHONE'].astype(str)
df_private['PSS_PHONE'] = df_private['PSS_PHONE'].str.replace(r'(\d{3})(\d{3})(\d{4})', r'(\1) \2-\3')



# Calculate the High School Enrollment (Grades 9-12)
cols_to_sum = ['PSS_ENROLL_9', 'PSS_ENROLL_10', 'PSS_ENROLL_11', 'PSS_ENROLL_12', 'PSS_ENROLL_UG']
df_private['HS_ENROLL'] = df_private[cols_to_sum].fillna(0).sum(axis=1)

  df_private['PSS_PHONE'] = df_private['PSS_PHONE'].str.replace(r'(\d{3})(\d{3})(\d{4})', r'(\1) \2-\3')


In [93]:
# Renaming columns in df_private to match df_public
df_private = df_private.rename(columns={
    'PSS_INST': 'School Name',
    'PSS_ADDRESS': 'Street Address',
    'PSS_CITY': 'City',
    'PSS_PHONE': 'Phone',
    'PSS_ZIP5': 'ZIP',
    'PSS_ZIP4': 'ZIP 4-digit',
    'PSS_COUNTY_NAME': 'County Name',
    'PSS_COUNTY_FIPS': 'County FIPS',
    'PSSCOUNTY_NO': 'County Number',
    'PSS_STABB': 'State',
    'PSS_ORIENT': 'Relig_subtype',
    'PSS_RELIG': 'Relig',
    'PSS_LOCALE': 'Locale',
    'PSS_COMM_TYPE': 'Community_Type',
    'PSS_INDIAN_PCT': 'demo_pct_American Indian',
    'PSS_ASIAN_PCT': 'demo_pct_Asian',
    'PSS_HISP_PCT': 'demo_pct_Hispanic',
    'PSS_BLACK_PCT': 'demo_pct_Black',
    'PSS_WHITE_PCT': 'demo_pct_White',
    'PSS_PACISL_PCT': 'demo_pct_Pacific Islander',
    'PSS_TWOMORE_PCT': 'demo_pct_Two_or_More',

    'PSS_COED': 'Coed',

    'PSS_STDTCH_RT': 'student_teacher_ratio',
    'PSS_SCHOOL_ID' : 'School ID(private)',
   
})

# Drop the columns that are not needed
drop_col = ['LoGrade','HiGrade','PSS_RACE_AI','PSS_RACE_AS','PSS_RACE_H',
            'PSS_RACE_B','PSS_RACE_W','PSS_RACE_P','PSS_RACE_2','PSS_TYPE',
            'PSS_LEVEL','PSS_ASSOC_2','PSS_ASSOC_3','PSS_FIPS','PSS_SCH_DAYS',
            'PSS_STU_DAY_HRS','PSS_LIBRARY','PSS_ENROLL_UG','PSS_ENROLL_PK',
            'PSS_ENROLL_K','PSS_ENROLL_1','PSS_ENROLL_2','PSS_ENROLL_3','PSS_ENROLL_4',
            'PSS_ENROLL_5','PSS_ENROLL_6','PSS_ENROLL_7','PSS_ENROLL_8','PSS_ENROLL_9',
            'PSS_ENROLL_10','PSS_ENROLL_11','PSS_ENROLL_12','PSS_ENROLL_T','PSS_ENROLL_TK12',
            'PSS_FTE_TEACH'

            ]
# print(df_private.columns)


# Creat new columns from the list and Fill the following columns with 'No'
fill_col = ['Charter','Magnet','Title I School','Title 1 School Wide']

for col in fill_col:
    df_private[col] = 'No'

# Drop the columns that are not needed
df_private = df_private.drop(columns=drop_col)


## Create columns and assign values for the following columns in private school data
df_private = df_private.assign(
    **{'Charter': 'No',
       'Magnet': 'No',
       'Title 1 School': 'No',
       'Title 1 School Wide': 'No'
       }

)

In [94]:
df_public.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1314 entries, 0 to 1313
Data columns (total 21 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   School Name            1314 non-null   object 
 1   NCES School ID         1314 non-null   int64  
 2   County Name            1314 non-null   object 
 3   Street Address         1314 non-null   object 
 4   City                   1314 non-null   object 
 5   State                  1314 non-null   object 
 6   ZIP                    1314 non-null   int64  
 7   ZIP 4-digit            1171 non-null   float64
 8   Phone                  1314 non-null   object 
 9   Locale Code            1314 non-null   int64  
 10  Locale                 1314 non-null   object 
 11  Charter                1314 non-null   object 
 12  Magnet                 1314 non-null   object 
 13  Title I School         1314 non-null   object 
 14  Title 1 School Wide    1314 non-null   object 
 15  Stud

In [95]:
## Make some changes to the public school data

# Rename columns to match private school data
# Rename Students to HS_ENROLL

df_public = df_public.rename(columns={'Students': 'HS_ENROLL'})

# Create and fill some columns that are from the private school data
df_public = df_public.assign(
    **{'Coed': 'Yes',
    'Relig': 'None',
    'Community_Type': df_public['Locale'].str.split(':').str[0],
    'Relig_subtype': 'None'}
)




In [96]:
# Lookup The demographic data for the public schools before merging

## PARSE PUBLIC SCHOOL DEMO DATA FROM NCES WEBSITE

import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
import os
from IPython.display import clear_output

import time

def parse_school_data(school_ids):
    # Start with an empty DataFrame to store the results
    df = pd.DataFrame()

    # We use tqdm to create a progress bar
    for school_id in tqdm(school_ids):

        
        # Build the URL for this school
        url = f"https://nces.ed.gov/ccd/schoolsearch/school_detail.asp?Search=1&SchoolID={school_id}&ID={school_id}&SchoolType=1&SchoolType=2&SchoolType=3&SchoolType=4&SpecificSchlTypes=all&IncGrade=-1&LoGrade=-1&HiGrade=-1&ID2={school_id}"

        # print(f"Processing school: {school_id}")

        ## Parse with pandas
        # Read the tables on the page into a list of DataFrames
        df_test = pd.read_html(url)

        # Process and print statements for debugging
        # print("Parsed html, extracting information...")

        # Get the value in the first column of the first row and drop everything in from of the \t
        school_name = df_test[2].iloc[0,0].split('\t')[1]
        school_id = df_test[2].iloc[0,2].split('ID:')[1].strip()
        school_district = df_test[2].iloc[2,0].split('\t')[1].split('district information')[0]
        school_district_id = df_test[2].iloc[2,2].split('ID:')[1].strip()

        #Get Total Students
        total_students = df_test[6].iloc[0,2]
        # Get Teachers at the school
        teachers = df_test[6].iloc[1,2]
        # Get the student/teacher ratio
        student_teacher_ratio = df_test[6].iloc[2,2]

        # Get all the grade levels and counts
        grade_levels = df_test[10].iloc[0,1:7]
        grade_counts = df_test[10].iloc[1,1:7]
        grade_dict = dict(zip(grade_levels, grade_counts))

        # Get free and reduced lunch counts
        lunch_free_count = df_test[16].iloc[0,0].split(':')[1].strip()
        lunch_reduced_count = df_test[16].iloc[0,1].split(':')[1].strip()
        lunch_total_count = df_test[16].iloc[0,2].split(':')[1].strip()

        # Get the demographic categories and counts
        demo_cat = df_test[12].iloc[0,1:8]
        demo_counts = df_test[12].iloc[1,1:8]
        demo_dict = dict(zip(demo_cat, demo_counts))

        # Get the gender categories and counts
        gender_cat = df_test[14].iloc[0,1:3]
        gender_counts = df_test[14].iloc[1,1:3]
        gender_dict = dict(zip(gender_cat, gender_counts))

        # print("Information extracted, saving to dataframe...")

        # Save the parsed data to a row in a dataframe
        df_test_row = pd.DataFrame({'school_name': school_name,
        'school_id': school_id,
        'school_district': school_district,
        'school_district_id': school_district_id,
        'total_students': total_students,
        'teachers': teachers,
        'student_teacher_ratio': student_teacher_ratio,
        'lunch_free_count': lunch_free_count,
        'lunch_reduced_count': lunch_reduced_count,
        'lunch_total_count': lunch_total_count,
        **grade_dict,
        **demo_dict,
        **gender_dict
        }, index=[0])

        # Append the new row to our DataFrame
        df = pd.concat([df, df_test_row], ignore_index=True)

        # add a delay to prevent getting blocked
        time.sleep(1)

        # print(f"Finished processing school: {school_id}")
        
        # Clear the output
        # clear_output(wait=True)

    return df



In [97]:
# Get the list of NCES id numbers and parse the data

# Get a list of the school Ids for all the public schools
nces_ids_list = df_public['NCES School ID'].tolist()




# test = nces_ids_list[0:5]

In [98]:
df_public.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1314 entries, 0 to 1313
Data columns (total 25 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   School Name            1314 non-null   object 
 1   NCES School ID         1314 non-null   int64  
 2   County Name            1314 non-null   object 
 3   Street Address         1314 non-null   object 
 4   City                   1314 non-null   object 
 5   State                  1314 non-null   object 
 6   ZIP                    1314 non-null   int64  
 7   ZIP 4-digit            1171 non-null   float64
 8   Phone                  1314 non-null   object 
 9   Locale Code            1314 non-null   int64  
 10  Locale                 1314 non-null   object 
 11  Charter                1314 non-null   object 
 12  Magnet                 1314 non-null   object 
 13  Title I School         1314 non-null   object 
 14  Title 1 School Wide    1314 non-null   object 
 15  HS_E

In [99]:
## Feed the list of school ids into the function
df = df_public 


# df = parse_school_data(nces_ids_list)

# print(df.head())

In [100]:
# import pandas as pd

# # df.head()

# # df.info()

# ## Load the file with the public school demo data from scrape
# df_public_demo = pd.read_csv('test_nces_parse.csv')

# df_public = df_public_demo


In [101]:
## Output csv to check the data
# df.to_csv('TEMP_test_nces_parse.csv', index=False)
# df.dtypes

# df_public.info()

df_public = df

In [102]:
####### LOAD the data from the csv file to avoid scraping again

import pandas as pd   
df = pd.read_csv('NEW_test_nces_parse.csv')

In [103]:
# df.info()

## Rename school_name and school_id back to 'School Name' and 'NCES School ID'
df = df.rename(columns={'school_name': 'School Name', 'school_id': 'NCES School ID'})

In [104]:
## Claculate the percentages for the demographic data so it matches with the private school data

# Create a list of the columns to calculate percentages for
demo_list = ['American Indian/Alaska Native', 'Asian', 'Black', 
             'Hispanic', 'White', 'Native Hawaiian/Pacific Islander', 
             'Two or MoreRaces', 'Male', 'Female']

# calculate and store the percentages
for col in demo_list:
    df[col] = df[col].astype(float)
    df[col] = df[col] / df['total_students'].astype(float)


In [105]:
### clean the data to only the high school level (grades 9-12)

# ## Add the values in the 9 10 11 12 columns to get HS_ENROLL by column position
df['HS_ENROLL'] = df.iloc[:, 9:13].sum(axis=1)

# drop any rows that have a 0 in the HS_ENROLL column
df = df[df['HS_ENROLL'] != 0]




  df['HS_ENROLL'] = df.iloc[:, 9:13].sum(axis=1)


In [106]:
# ## Calc male and femalE percentages

# df['Male'] = df['Male'] / df['HS_ENROLL']
# df['Female'] = df['Female'] / df['HS_ENROLL']

In [107]:
## Drop the columns that are not needed for the analysis

drop_col = ['KG','1','2','3','4','5','6','7','8','PK','Ungraded']

df.drop(drop_col, axis=1, inplace=True)

## Rename the columns for 9 10 11 12 to 9th 10th 11th 12th
df.rename(columns={'9':'9th', '10':'10th', '11':'11th', '12':'12th'}, inplace=True)

In [108]:


## Rename back to the original df name
public_df = df

# public_df.head()
public_df.info()




<class 'pandas.core.frame.DataFrame'>
Int64Index: 899 entries, 0 to 1313
Data columns (total 24 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   School Name                       899 non-null    object 
 1   NCES School ID                    899 non-null    int64  
 2   school_district                   899 non-null    object 
 3   school_district_id                899 non-null    int64  
 4   total_students                    899 non-null    float64
 5   teachers                          899 non-null    float64
 6   student_teacher_ratio             899 non-null    object 
 7   lunch_free_count                  899 non-null    object 
 8   lunch_reduced_count               899 non-null    object 
 9   lunch_total_count                 899 non-null    object 
 10  9th                               886 non-null    float64
 11  10th                              883 non-null    float64
 12  11th   

In [109]:
## Reload original school data
df_public = pd.read_csv('data\MHSAA\school_info\school_info_PUBLIC.csv')
# df_private = pd.read_csv('data\MHSAA\school_info\school_info_PRIVATE.csv')

public_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 899 entries, 0 to 1313
Data columns (total 24 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   School Name                       899 non-null    object 
 1   NCES School ID                    899 non-null    int64  
 2   school_district                   899 non-null    object 
 3   school_district_id                899 non-null    int64  
 4   total_students                    899 non-null    float64
 5   teachers                          899 non-null    float64
 6   student_teacher_ratio             899 non-null    object 
 7   lunch_free_count                  899 non-null    object 
 8   lunch_reduced_count               899 non-null    object 
 9   lunch_total_count                 899 non-null    object 
 10  9th                               886 non-null    float64
 11  10th                              883 non-null    float64
 12  11th   

In [110]:
## add some column to puyblic data to match private data

# Create and fill some columns that are from the private school data
public_df = public_df.assign(
    **{'Coed': 'Yes',
    'Relig': 'None',
    'Locale': df_public['Locale'],
    'Community_Type': df_public['Locale'].str.split(':').str[0],
    'Charter': df_public['Charter'],
    'Magnet': df_public['Magnet'],
    'Relig_subtype': 'None',
    'Title 1 School': df_public['Title I School'],
    'Title 1 School Wide': df_public['Title 1 School Wide'],
    
    'type': 'public'}
)

public_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 899 entries, 0 to 1313
Data columns (total 34 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   School Name                       899 non-null    object 
 1   NCES School ID                    899 non-null    int64  
 2   school_district                   899 non-null    object 
 3   school_district_id                899 non-null    int64  
 4   total_students                    899 non-null    float64
 5   teachers                          899 non-null    float64
 6   student_teacher_ratio             899 non-null    object 
 7   lunch_free_count                  899 non-null    object 
 8   lunch_reduced_count               899 non-null    object 
 9   lunch_total_count                 899 non-null    object 
 10  9th                               886 non-null    float64
 11  10th                              883 non-null    float64
 12  11th   

In [111]:
## The address information got lost from the public school data. Need to get it from the original file
public_orig = pd.read_csv('data\MHSAA\school_info\school_info_PUBLIC.csv')



# match the NCES School ID from the processed data to the original file and get the address information.
# don't keep data from schools that aren't in the processed data
public_df = public_df.merge(public_orig[['NCES School ID', 'Street Address', 'City', 'State', 'ZIP', 'Locale']], on='NCES School ID', how='left')




In [112]:
## Rename the unique id columns in both dfs to match

df_private.rename(columns={'School ID(private)': 'u_school_id'}, inplace=True)
# Add 'private' to type in the private df
df_private['type'] = 'private'

# Rename the HS_ENROLL column to match the public df
df_private.rename(columns={'HS_ENROLL': 'total_students'}, inplace=True)

public_df.rename(columns={'NCES School ID': 'u_school_id'}, inplace=True)

## add 'public' to type in the public df
public_df['type'] = 'public'

In [113]:
## Rename some columns to match

cols = 'American Indian/Alaska Native', 'Asian', 'Black', 'Hispanic', 'White', 'Native Hawaiian/Pacific Islander', 'Two or MoreRaces'

new_names = 'demo_pct_American Indian', 'demo_pct_Asian', 'demo_pct_Black', 'demo_pct_Hispanic', 'demo_pct_White', 'demo_pct_Pacific Islander', 'demo_pct_Two_or_More'

public_df.rename(columns=dict(zip(cols, new_names)), inplace=True)

In [114]:
# df_private.info()

# public_df.info()

In [115]:
# public_df.info()

In [116]:
## Concatenate the public and private dataframes

final_df = pd.concat([public_df, df_private], ignore_index=True)

In [117]:
## Move the Locale_y values to Locale if Locale is null
final_df['Locale'] = final_df['Locale'].fillna(final_df['Locale_y'])

## Drop the Locale_y column
final_df.drop('Locale_y', axis=1, inplace=True)

# Drop the Locale_x column
final_df.drop('Locale_x', axis=1, inplace=True)

# Other columns to drop

drop_col = ['PSS_ASSOC_1', 'Title I School', 'Phone']

final_df.drop(drop_col, axis=1, inplace=True)


In [118]:
## Review the final dataframe

final_df.info()
final_df.head()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1055 entries, 0 to 1054
Data columns (total 41 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   School Name                1055 non-null   object 
 1   u_school_id                1055 non-null   object 
 2   school_district            899 non-null    object 
 3   school_district_id         899 non-null    float64
 4   total_students             1055 non-null   float64
 5   teachers                   899 non-null    float64
 6   student_teacher_ratio      1055 non-null   object 
 7   lunch_free_count           899 non-null    object 
 8   lunch_reduced_count        899 non-null    object 
 9   lunch_total_count          899 non-null    object 
 10  9th                        886 non-null    float64
 11  10th                       883 non-null    float64
 12  11th                       872 non-null    float64
 13  12th                       753 non-null    float

Unnamed: 0,School Name,u_school_id,school_district,school_district_id,total_students,teachers,student_teacher_ratio,lunch_free_count,lunch_reduced_count,lunch_total_count,...,Title 1 School Wide,type,Street Address,City,State,ZIP,PSS_COUNTY_NO,County FIPS,Locale,County Name
0,54th Street Academy,262016008622,Kelloggsville Public Schools,2620160.0,66.0,2.4,27.5,54,2,56,...,Yes,public,173 54TH ST SW,GRAND RAPIDS,MI,49548,,,Suburb: Large,
1,Academic and Career Education Academy,260033004509,Academic And Career Education Academy,2600330.0,100.0,5.52,18.12,92,4,96,...,Yes,public,884 EAST ISABELLA RD,MIDLAND,MI,48640,,,Rural: Fringe,
2,Academy for Business and Technology High School,260016601035,Academy For Business And Technology,2600166.0,281.0,16.8,16.73,267,2,269,...,Yes,public,19625 WOOD ST,MELVINDALE,MI,48122,,,Suburb: Large,
3,ACCE,263663008789,Ypsilanti Community Schools,2636630.0,187.0,7.5,24.93,158,4,162,...,Yes,public,1076 Ecorse Road,Ypsilanti,MI,48198,,,Suburb: Large,
4,Accelerated Learning Academy,261452002043,Flint School District Of The City Of,2614520.0,158.0,10.2,15.49,151,1,152,...,Yes,public,1602 S AVERILL AVE,FLINT,MI,48503,,,City: Small,


In [119]:


## replace Suburb with Suburban
final_df['Community_Type'] = final_df['Community_Type'].replace('Suburb', 'Suburban')

In [124]:
## community type value counts
final_df['Community_Type'].value_counts()

# # Relig_subtype value counts
final_df['Relig_subtype'].value_counts()

# ## Show locale value counts
# final_df['Locale'].value_counts()

# ## Show type value counts
# final_df['type'].value_counts()

# ## Show Coed value counts
# final_df['Coed'].value_counts()

# ## Show Relig value counts
# final_df['Relig'].value_counts()

# ## Show relig_subtype value counts
# final_df['Relig_subtype'].value_counts()

# # Charter value counts
# final_df['Charter'].value_counts()

# # Magnet value counts
# final_df['Magnet'].value_counts()

None                                     899
Christian (no denomination specified)     49
Roman Catholic                            31
Baptist                                   26
Nonsectarians                             21
Jewish                                     5
Calvinist                                  4
Seventh Day Adventists                     4
Islamic                                    3
Lutheran Church – Missouri Synod           3
Mennonite                                  3
Methodist                                  1
Other Lutheran                             1
Other                                      1
Wisconsin Evangelical Lutheran Synod       1
Assembly of God                            1
Brethren                                   1
Penecostal                                 1
Name: Relig_subtype, dtype: int64

In [121]:
## Save the data to a csv file
final_df.to_csv('../BB_CLEAN/data/school_master_table_NEW.csv', index=False)

In [122]:
final_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1055 entries, 0 to 1054
Data columns (total 41 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   School Name                1055 non-null   object 
 1   u_school_id                1055 non-null   object 
 2   school_district            899 non-null    object 
 3   school_district_id         899 non-null    float64
 4   total_students             1055 non-null   float64
 5   teachers                   899 non-null    float64
 6   student_teacher_ratio      1055 non-null   object 
 7   lunch_free_count           899 non-null    object 
 8   lunch_reduced_count        899 non-null    object 
 9   lunch_total_count          899 non-null    object 
 10  9th                        886 non-null    float64
 11  10th                       883 non-null    float64
 12  11th                       872 non-null    float64
 13  12th                       753 non-null    float

In [123]:
### get a list of the school names from the public school data and the private school data

# public_school_names = df_public['School Name'].tolist()
# private_school_names = df_private['School Name'].tolist()