# Set-Up

In [1]:
import pandas as pd
import re
import sklearn
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

#pd.set_option('display.max_rows', None)
#pd.set_option('display.max_columns', None)
#pd.set_option('display.max_colwidth', None)

In [2]:
data_home = "C:/Users/johnw/Box/Michigan Parent Dashboard/Michigan Data"

In [3]:
MISERVICEOFFERING = pd.read_csv(f"{data_home}/CleanedSchoolServicesOffering_06_26_23.csv")
SCHOOLINFO = pd.read_csv(f"{data_home}/7ec38fe6-65ce-40c8-8a63-da51b7b41f54.csv")
ENROLLMENT = pd.read_csv(f"{data_home}/ENROLLMENT_2223.csv")
ECODIS = pd.read_csv(f"{data_home}/ECODIS_2223.csv")
NCES = pd.read_csv(f"{data_home}/ncesdata_E74F29BE.csv")
FINANCE = pd.read_csv(f"{data_home}/22-Port-74v Per-Student Exp Download File.csv")
P2P = pd.read_excel(f"{data_home}/P2P_10_14_2023.xlsx")
SCHOOLINDEX = pd.read_excel(f"{data_home}/202122_School_Index_Results.xlsx")

# Matching School Service and Offering Data with Building Code ID

Unfortunately for me, the school services and offerings do not contain the building code id. As a result, I'll have to rely on techniques that will best approximate a match with those building code IDs, but I also have to be prepared for instances in which they do not match.

The first approach for identifying matches will be an exact match: within a school district code, are there any school names that exactly match up to my school names collected? Those schools will subsequently be assigned that building code.

The second approach for identifying matches will be a fuzzy match: within a school district code, are there any school names that approximately match up to my school names collected? Those fuzzy matched schools will be checked and subsequently assigned that building code.

The third approach for identifying matches will be manual: anything that has already been matched will be removed from the search list. Afterwards, I will look within district for the remaining set of school names and see if there is a close enough match. If there is, I will manually adjust the building code. If not, then I will have to remove the school from my sample.

In [4]:
MIPREFULLSCHOOLDATA = MISERVICEOFFERING.copy()

In [5]:
#Making Michigan Full School Data Consistent for DistrictCode and BuildingName merge
MIPREFULLSCHOOLDATA = MIPREFULLSCHOOLDATA.drop('Unnamed: 0', axis=1)
MIPREFULLSCHOOLDATA.rename(columns={'District ID': 'DistrictCode', 'School Name': 'BuildingName'}, inplace=True)

In [6]:
#In the SCHOOLINFO dataset, building codes are set at 0 if it's a district name. For non-traditional schools like charters, the district name and school name may be the same and hence have multiple districtcodes. I drop those that are set at 0.
SCHOOLINFO = SCHOOLINFO[SCHOOLINFO['BuildingCode'] != 0]

#I now remove all that are fully duplicated in SCHOOLINFO
SCHOOLINFO = SCHOOLINFO.drop_duplicates()

In [7]:
SCHOOLINFO[SCHOOLINFO.duplicated(subset=['DistrictCode', 'BuildingName'])]

Unnamed: 0,SchoolYear,ISDCode,ISDName,DistrictCode,DistrictName,BuildingCode,BuildingName,COUNTY_CODE,COUNTY_NAME,EntityType,...,SCHOOL_EMPHASIS,SETTING,EMAIL_ADDRESS,PhoneNumber,ADDRESS_LINE_1,CITY,STATE,ZIP_CODE,SCHOOL_TYPE,Status
1033,22 - 23 School Year,28,Northwest Education Services,28904,The Greenspire School,4021,The Greenspire School,28,Grand Traverse,PSA School,...,General Education,Multiple Settings,robert.walker@greenspireschool.org,2314215905,2200 Dendrinos Dr,Traverse City,MI,49684,General Education,Open-Active
1963,22 - 23 School Year,41,Kent ISD,41931,PrepNet Virtual Academy,4052,PrepNet Virtual Academy,41,Kent,PSA Unique Education Provider,...,General Education,Multiple Settings,123.mvanklompenb@nhaschools.com,6169291325,3834 52nd St Se,Kentwood,MI,49512,General Education,Open-Active
3345,22 - 23 School Year,63,Oakland Schools,63929,Waterford Montessori Academy,3430,Waterford Montessori Academy,63,Oakland,PSA School,...,General Education,Multiple Settings,cgreen@waterfordmontessori.org,2486742400,4350 Elizabeth Lake Rd,Waterford,MI,48328,General Education,Open-Active
4688,22 - 23 School Year,82,Wayne RESA,82702,University Preparatory Academy (PSAD),9908,University Preparatory Academy (PSAD) - Elemen...,82,Wayne,PSA School,...,General Education,Multiple Settings,kimberly.llorens@uprepschools.com,3133090552,435 Amsterdam St,Detroit,MI,48202,General Education,Open-Active
4845,22 - 23 School Year,82,Wayne RESA,82941,Star International Academy,8636,Star International Academy,82,Wayne,PSA School,...,General Education,Multiple Settings,abazzi@hesedu.com,3135650507,24425 Hass St,Dearborn Heights,MI,48127,General Education,Open-Active


In [8]:
#Manually fixing the remaining problematic rows in SCHOOLINFO
SCHOOLINFO.loc[1033,'BuildingName'] = 'The Greenspire High School'

#Drop the 2nd PrepNet Virtual Academy, 2nd Waterford Montessori Academy, 2nd University Preparatory Academy (PSAD) - Elementary, and 1st Star International Academy
SCHOOLINFO = SCHOOLINFO.drop([1963, 3345, 4688, 4844])

In [9]:
#Merge: Merge if DistrictCode and BuildingName are a match
MIFULLSCHOOLDATA = MIPREFULLSCHOOLDATA.merge(SCHOOLINFO, how='left', on=['DistrictCode', 'BuildingName'], indicator=True)

In [10]:
#Here I can say that we basically merged 3795/3818, or 99.4% of Michigan's data from the Parent Dashboard to 
MIFULLSCHOOLDATA['_merge'].value_counts()

both          3795
left_only       23
right_only       0
Name: _merge, dtype: int64

In [11]:
unmerged_rows = MIFULLSCHOOLDATA[MIFULLSCHOOLDATA['BuildingCode'].isna()]
unmerged_rows['BuildingName']

459                    Michigan International Prep-Howell
460                   Michigan International Prep-Saginaw
821                   Hillsdale County ISD Local Programs
1071     Maple Elementary at the Morey Educational Center
1158                            KRESA Head Start new LIfe
2105                              Seiter Education Center
2140                                 Success Park - Flint
2141                              Success Park - GR South
2142                                 Success Park - Hazel
2143                         Success Park - Lansing South
2144                          Success Park - Lansing West
2145                                Success Park - Monroe
2146                              Success Park - Muskegon
2147                     Success Park - Pontiac/Waterford
2148                                Success Park - Taylor
2149                             Success Park -Port Huron
2639                          Flex High School of Pontiac
2728          

In [12]:
#Michigan International Prep School - Howell and Saginaw - Drop [Online and NOT DISTINCT]
#Hillsdale County ISD Local Programs - Drop [ISD not SCHOOL]
#Maple Elementary at the Morey Educational Center - Drop [NOT IN MICHIGAN SCHOOL DATA]
#KRESA Head Start new LIfe - 3318
#Seiter Education Center - 6333
#Success Park - Drop [Online - All Derivatives of Online School]
#Flex High School of Pontiac - Drop [Not in School info data at all]
#Holland Virtual Tech High School - 9962
#5/6 Building at Georgetown - Drop
#Akron-Fairgrove Schools Academic Learning Center - Drop
#Crockett Midtown High School of Science and Medicine - Drop
#East English Village Preparatory Academy at Finney - 1189
#FRCS Operations Facility - Drop

#Replace BuildingCode First
MIFULLSCHOOLDATA.loc[1158,'BuildingCode'] = 3318
MIFULLSCHOOLDATA.loc[2105,'BuildingCode'] = 6333
MIFULLSCHOOLDATA.loc[2728,'BuildingCode'] = 9962
MIFULLSCHOOLDATA.loc[3321,'BuildingCode'] = 1189

#Drop Remaining Unmatched BuildingCodes
MIFULLSCHOOLDATA = MIFULLSCHOOLDATA.drop([459, 460, 821, 1071, 2140, 2141, 2142, 2143, 2144, 2145, 2146, 2147, 2148, 2149, 2639, 2780, 3092, 3317, 3552])

In [13]:
MIFULLSCHOOLDATA = MIFULLSCHOOLDATA.drop('_merge', axis=1)

# Merging Michigan Data

In [14]:
MIFULLSCHOOLDATA['BuildingCode'] = MIFULLSCHOOLDATA['BuildingCode'].astype(int)

In [15]:
#Merge Enrollment: Merge if DistrictCode and BuildingCode are a match
MIFULLSCHOOLDATA = MIFULLSCHOOLDATA.merge(ENROLLMENT, how='left', on=['DistrictCode', 'BuildingCode'])

In [16]:
#Merge Economically Disadvantaged: Merge if DistrictCode and BuildingCode are a match
MIFULLSCHOOLDATA = MIFULLSCHOOLDATA.merge(ECODIS, how='left', on=['DistrictCode', 'BuildingCode'])

In [17]:
#Merge Charter School
NCES['DistrictCode'] = pd.to_numeric(NCES['State District ID'].str.split('-').str[-1])
NCES['BuildingCode'] = pd.to_numeric(NCES['State School ID'].str.split('-').str[-1])
MIFULLSCHOOLDATA = MIFULLSCHOOLDATA.merge(NCES[['Magnet', 'Student Teacher Ratio', 'Type', 'DistrictCode', 'BuildingCode']], how='left', on=['DistrictCode', 'BuildingCode'])

In [18]:
#Merge Financial Data: Merge if DistrictCode and BuildingCode are a match
MIFULLSCHOOLDATA = MIFULLSCHOOLDATA.merge(FINANCE[['Total Building PPE', 'DistrictCode', 'BuildingCode']], how='left', on=['DistrictCode', 'BuildingCode'])

In [19]:
#Merge Pay-to-play Data: Merge if District Codes are a match
MIFULLSCHOOLDATA = MIFULLSCHOOLDATA.merge(P2P[['DistrictCode','EC_FEE', 'EC_FEE_WAIVER']], how='left', on=['DistrictCode'])

In [20]:
#Merge School Index Data: Merge if DistrictCode and BuildingCode are a match
MIFULLSCHOOLDATA = MIFULLSCHOOLDATA.merge(SCHOOLINDEX[['DistrictCode', 'BuildingCode', 'CompositeIndex', 'GrowthIndex','ProficiencyIndex','GraduationIndex','ELProgressIndex','SchoolQualityIndex','SubjectParticipationIndex','ELParticipationIndex','SupportCategoryName','SupportCategoryReason','SupportCategorySubgroupList',]], how='left', on=['DistrictCode', 'BuildingCode'])

# Creating Variables

## % Total Variables

In [21]:
# Convert columns to numeric data type if needed
MIFULLSCHOOLDATA['tot_ai'] = pd.to_numeric(MIFULLSCHOOLDATA['tot_ai'], errors='coerce')
MIFULLSCHOOLDATA['tot_as'] = pd.to_numeric(MIFULLSCHOOLDATA['tot_as'], errors='coerce')
MIFULLSCHOOLDATA['tot_aa'] = pd.to_numeric(MIFULLSCHOOLDATA['tot_aa'], errors='coerce')
MIFULLSCHOOLDATA['tot_hw'] = pd.to_numeric(MIFULLSCHOOLDATA['tot_hw'], errors='coerce')
MIFULLSCHOOLDATA['tot_wh'] = pd.to_numeric(MIFULLSCHOOLDATA['tot_wh'], errors='coerce')
MIFULLSCHOOLDATA['tot_hs'] = pd.to_numeric(MIFULLSCHOOLDATA['tot_hs'], errors='coerce')
MIFULLSCHOOLDATA['tot_mr'] = pd.to_numeric(MIFULLSCHOOLDATA['tot_mr'], errors='coerce')
MIFULLSCHOOLDATA['Total Economically Disadvantaged'] = pd.to_numeric(MIFULLSCHOOLDATA['Total Economically Disadvantaged'], errors='coerce')
MIFULLSCHOOLDATA['tot_all'] = pd.to_numeric(MIFULLSCHOOLDATA['tot_all'], errors='coerce')
MIFULLSCHOOLDATA['Total Count'] = pd.to_numeric(MIFULLSCHOOLDATA['Total Count'], errors='coerce')

In [22]:
#% American Indian
MIFULLSCHOOLDATA['per_americanindian'] = MIFULLSCHOOLDATA['tot_ai'] / MIFULLSCHOOLDATA['tot_all'] * 100

#% Asian
MIFULLSCHOOLDATA['per_asian'] = MIFULLSCHOOLDATA['tot_as'] / MIFULLSCHOOLDATA['tot_all'] * 100

#% African American
MIFULLSCHOOLDATA['per_africanamerican'] = MIFULLSCHOOLDATA['tot_aa'] / MIFULLSCHOOLDATA['tot_all'] * 100

#% Native Hawaiian
MIFULLSCHOOLDATA['per_nativehawaiian'] = MIFULLSCHOOLDATA['tot_hw'] / MIFULLSCHOOLDATA['tot_all'] * 100

#% White
MIFULLSCHOOLDATA['per_white'] = MIFULLSCHOOLDATA['tot_wh'] / MIFULLSCHOOLDATA['tot_all'] * 100

#% Hispanic
MIFULLSCHOOLDATA['per_hispanic'] = MIFULLSCHOOLDATA['tot_hs'] / MIFULLSCHOOLDATA['tot_all'] * 100

#% Multiracial
MIFULLSCHOOLDATA['per_multiracial'] = MIFULLSCHOOLDATA['tot_mr'] / MIFULLSCHOOLDATA['tot_all'] * 100

#% Non-White [Combine Multiple Categories]
MIFULLSCHOOLDATA['per_nonwhite'] = MIFULLSCHOOLDATA['per_americanindian'] + MIFULLSCHOOLDATA['per_asian'] +  MIFULLSCHOOLDATA['per_africanamerican'] + MIFULLSCHOOLDATA['per_nativehawaiian'] + MIFULLSCHOOLDATA['per_hispanic'] + MIFULLSCHOOLDATA['per_multiracial']

#% Non-White or Asian
MIFULLSCHOOLDATA['per_nonwhiteasian'] = MIFULLSCHOOLDATA['per_americanindian'] + MIFULLSCHOOLDATA['per_africanamerican'] + MIFULLSCHOOLDATA['per_nativehawaiian'] + MIFULLSCHOOLDATA['per_hispanic'] + MIFULLSCHOOLDATA['per_multiracial']

#% White or Asian
MIFULLSCHOOLDATA['per_whiteasian'] = MIFULLSCHOOLDATA['per_white'] + MIFULLSCHOOLDATA['per_asian']

#% African American or Hispanic
MIFULLSCHOOLDATA['per_blackorhisp'] = MIFULLSCHOOLDATA['per_africanamerican'] + MIFULLSCHOOLDATA['per_hispanic']

#% Other
MIFULLSCHOOLDATA['per_other'] = MIFULLSCHOOLDATA['per_americanindian'] + MIFULLSCHOOLDATA['per_nativehawaiian'] + MIFULLSCHOOLDATA['per_multiracial']

#% Economically Disadvantaged
MIFULLSCHOOLDATA['per_ecodis'] = MIFULLSCHOOLDATA['Total Economically Disadvantaged'] / MIFULLSCHOOLDATA['Total Count'] * 100

It appears that Michigan collects the Economically Disadvantaged Data in the Fall while the enrollment data are collected in the Spring. As a consequence, there is a slight discrepancy between the tot_all variable and the total number of students that are economically disadvantaged. This impacts future analyses because we obtained values that are 'impossible' - a school with 138% economically disadvantaged students. Since a school can have, at most, 100% economically disadvantaged, this unfortunately skews our analyses. To address this, I believe the best way to ameliorate this situation is to use the total students from economically disadvantaged spreadsheet rather than the total Spring enrollment. This approach does introduce discrepancies: the student counts are now different between these percentage variables. There is no guarantee that the same students from the Fall enrollment are the same as the Spring enrollment; however, I suspect that the percentage of students that are economically disadvantaged from the Fall count and the Spring enrollment do not drastically differ.

The existence of values over 100% feels unjustifiable. An alternative would be to artifically cap out the set of values to 100, but I think that also feels like a false representation of the data. In actuality, there are very few schools that are impacted by this, but I think for consistentcy purposes, it is best to use the Fall total counts - even if it requires me to re-do a bunch of analyses.

## School Level Indicators

In [23]:
# Create dummy variables for 'School Level'
dummies = pd.get_dummies(MIFULLSCHOOLDATA['School Level'])

# Concatenate the dummy variables with the original dataset
MIFULLSCHOOLDATA = pd.concat([MIFULLSCHOOLDATA, dummies], axis=1)

In [24]:
#Create an "All School Level" indicator
MIFULLSCHOOLDATA['All'] = 1

## Urbanicity

In [25]:
# Create dummy variables for 'School Level'
dummies = pd.get_dummies(MIFULLSCHOOLDATA['LOCALE_NAME'])

# Concatenate the dummy variables with the original dataset
MIFULLSCHOOLDATA = pd.concat([MIFULLSCHOOLDATA, dummies], axis=1)

In [26]:
#Condense to City
citycolumns = MIFULLSCHOOLDATA[['City: Large', 'City: Midsize', 'City: Small']]
citycolumnssum = citycolumns.sum(axis=1)
MIFULLSCHOOLDATA['City'] = citycolumnssum

#Condense to Rural
ruralcolumns = MIFULLSCHOOLDATA[['Rural: Distant', 'Rural: Fringe', 'Rural: Remote']]
ruralcolumnssum = ruralcolumns.sum(axis=1)
MIFULLSCHOOLDATA['Rural'] = ruralcolumnssum

#Condense to Suburb
suburbcolumns = MIFULLSCHOOLDATA[['Suburb: Large', 'Suburb: Midsize', 'Suburb: Small']]
suburbcolumnssum = suburbcolumns.sum(axis=1)
MIFULLSCHOOLDATA['Suburb'] = suburbcolumnssum

#Condense to Town
towncolumns = MIFULLSCHOOLDATA[['Town: Distant', 'Town: Fringe', 'Town: Remote']]
towncolumnssum = towncolumns.sum(axis=1)
MIFULLSCHOOLDATA['Town'] = towncolumnssum

## Charter and Magnet

In [27]:
MIFULLSCHOOLDATA['Charter'] = 0
MIFULLSCHOOLDATA.loc[(MIFULLSCHOOLDATA['EntityType'] == "PSA School") | (MIFULLSCHOOLDATA['EntityType'] == "PSA Unique Education Provider"), 'Charter'] = 1

In [28]:
binary_mapping = {'No': 0, 'Yes': 1}
#MIFULLSCHOOLDATA['Charter'] = MIFULLSCHOOLDATA['Charter'].map(binary_mapping)
MIFULLSCHOOLDATA['Magnet'] = MIFULLSCHOOLDATA['Magnet'].map(binary_mapping)

## Student Teacher Ratio

In [29]:
MIFULLSCHOOLDATA['STRatio'] = pd.to_numeric(MIFULLSCHOOLDATA['Student Teacher Ratio'], errors='coerce')

# District Dummy Variables

In [30]:
# Create dummy variables for 'School Level'
district_dummies = pd.get_dummies(MIFULLSCHOOLDATA['DistrictCode'], prefix='districtdummy')

# Concatenate the dummy variables with the original dataset
MIFULLSCHOOLDATA = pd.concat([MIFULLSCHOOLDATA, district_dummies], axis=1)

# Attaining sense of Missingness

## Across Entire Sample

### Complete Missing

In [31]:
MIFULLSCHOOLDATA[MIFULLSCHOOLDATA['complete_missing'] == 1]

Unnamed: 0,DistrictCode,District Name,Core Link,School Level,BuildingName_x,School Grade,School Link,Offering Link,ACS_CTE_AGFN,ACS_CTE_ARCHCONS,...,districtdummy_82995,districtdummy_82996,districtdummy_82997,districtdummy_83000,districtdummy_83010,districtdummy_83060,districtdummy_83070,districtdummy_83900,districtdummy_84010,districtdummy_84050
9,3000,Allegan Area Educational Service Agency - Dis...,https://legacy.mischooldata.org/ParentDashboar...,Other,Early College Allegan County,-,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
23,3020,Otsego Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Other,Transportation/Maintenance Facility,-,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
73,4010,Alpena Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Other,APS Partnership,-,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
74,4010,Alpena Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Other,Transportation Center,-,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
94,7010,Arvon Township School District,https://legacy.mischooldata.org/ParentDashboar...,Elementary/Middle School,Arvon Township School,K-8,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3756,82975,Riverside Academy,https://legacy.mischooldata.org/ParentDashboar...,Elementary School,Riverside Academy - Pre-K,K,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
3781,83000,Wexford-Missaukee ISD - District created from...,https://legacy.mischooldata.org/ParentDashboar...,Other,Wexford-Missaukee Early College,-,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,1,0,0,0,0,0,0
3783,83010,Cadillac Area Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary School,Forest View Elementary School,K-5,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,1,0,0,0,0,0
3789,83010,Cadillac Area Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Other,Viking Learning Center,-,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,1,0,0,0,0,0


#### Systematic Missingness Analysis

In [32]:
# Select the relevant columns from the dataset
data = MIFULLSCHOOLDATA[['complete_missing', 'per_white', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'High School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Suburb', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE','CompositeIndex']].copy()

# Drop any rows with missing values
data = data.dropna()

# Define the predictors (X) and the outcome variable (y)
X = data[['per_white', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE','CompositeIndex']]
y = data['complete_missing']

# Add a constant column to the predictors
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X)
results = model.fit()

# Print the regression summary
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:       complete_missing   R-squared:                       0.010
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     1.652
Date:                Mon, 27 Nov 2023   Prob (F-statistic):             0.0407
Time:                        10:30:36   Log-Likelihood:                 899.42
No. Observations:                3004   AIC:                            -1761.
Df Residuals:                    2985   BIC:                            -1647.
Df Model:                          18                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
const       

In [33]:
# Select the relevant columns from the dataset
data = MIFULLSCHOOLDATA[['complete_missing', 'per_whiteasian', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'High School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Suburb', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE','CompositeIndex']].copy()

# Drop any rows with missing values
data = data.dropna()

# Define the predictors (X) and the outcome variable (y)
X = data[['per_whiteasian', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']]
y = data['complete_missing']

# Add a constant column to the predictors
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X)
results = model.fit()

# Print the regression summary
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:       complete_missing   R-squared:                       0.010
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     1.649
Date:                Mon, 27 Nov 2023   Prob (F-statistic):             0.0413
Time:                        10:30:43   Log-Likelihood:                 899.40
No. Observations:                3004   AIC:                            -1761.
Df Residuals:                    2985   BIC:                            -1647.
Df Model:                          18                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
const       

### Voluntary Missing

In [34]:
MIFULLSCHOOLDATA[MIFULLSCHOOLDATA['voluntary_missing'] == 1]

Unnamed: 0,DistrictCode,District Name,Core Link,School Level,BuildingName_x,School Grade,School Link,Offering Link,ACS_CTE_AGFN,ACS_CTE_ARCHCONS,...,districtdummy_82995,districtdummy_82996,districtdummy_82997,districtdummy_83000,districtdummy_83010,districtdummy_83060,districtdummy_83070,districtdummy_83900,districtdummy_84010,districtdummy_84050
0,1010,Alcona Community Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary School,Alcona Elementary School,K-5,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2010,AuTrain-Onota Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary/Middle School,AuTrain-Onota Public School,K-8,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020,Burt Township School District,https://legacy.mischooldata.org/ParentDashboar...,Elementary through High School,Burt Township School,K-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2070,Munising Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary/Middle School,William G. Mather Elementary School,K-6,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
5,2070,Munising Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Middle/High School,Munising High and Middle School,6-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3783,83010,Cadillac Area Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary School,Forest View Elementary School,K-5,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,1,0,0,0,0,0
3789,83010,Cadillac Area Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Other,Viking Learning Center,-,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,1,0,0,0,0,0
3796,84010,Michigan Department of Human Services,https://legacy.mischooldata.org/ParentDashboar...,Middle/High School,Bay Pines Center,6-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,1,0
3797,84010,Michigan Department of Human Services,https://legacy.mischooldata.org/ParentDashboar...,Middle/High School,Shawono Center,6-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,1,0


#### Systematic Missingness Analysis

In [35]:
# Select the relevant columns from the dataset
data = MIFULLSCHOOLDATA[['voluntary_missing', 'per_white', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'High School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Suburb', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']].copy()

# Drop any rows with missing values
data = data.dropna()

# Define the predictors (X) and the outcome variable (y)
X = data[['per_white', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']]
y = data['voluntary_missing']

# Add a constant column to the predictors
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X)
results = model.fit()

# Print the regression summary
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:      voluntary_missing   R-squared:                       0.110
Model:                            OLS   Adj. R-squared:                  0.104
Method:                 Least Squares   F-statistic:                     20.44
Date:                Mon, 27 Nov 2023   Prob (F-statistic):           6.05e-63
Time:                        10:30:49   Log-Likelihood:                -844.86
No. Observations:                3004   AIC:                             1728.
Df Residuals:                    2985   BIC:                             1842.
Df Model:                          18                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
const       

In [36]:
# Select the relevant columns from the dataset
data = MIFULLSCHOOLDATA[['voluntary_missing', 'per_whiteasian', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'High School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Suburb', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']].copy()

# Drop any rows with missing values
data = data.dropna()

# Define the predictors (X) and the outcome variable (y)
X = data[['per_whiteasian', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']]
y = data['voluntary_missing']

# Add a constant column to the predictors
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X)
results = model.fit()

# Print the regression summary
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:      voluntary_missing   R-squared:                       0.108
Model:                            OLS   Adj. R-squared:                  0.103
Method:                 Least Squares   F-statistic:                     20.18
Date:                Mon, 27 Nov 2023   Prob (F-statistic):           4.55e-62
Time:                        10:30:49   Log-Likelihood:                -846.98
No. Observations:                3004   AIC:                             1732.
Df Residuals:                    2985   BIC:                             1846.
Df Model:                          18                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
const       

## Across General Education Sample

In [37]:
MIGENEDUSCHOOLDATA = MIFULLSCHOOLDATA[MIFULLSCHOOLDATA['SCHOOL_TYPE'] == 'General Education']

In [38]:
#Dropping Missing Variables for our Main Covariates / Mediators
variables_to_check = [
    'voluntary_missing', 'per_white', 'per_ecodis', 'tot_all', 'Elementary School',
    'Elementary through High School', 'Elementary/Middle School', 'High School',
    'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Suburb', 'Town',
    'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex'
]

# Drop missing values for the specified variables
MIGENEDUSCHOOLDATA = MIGENEDUSCHOOLDATA.dropna(subset=variables_to_check)

In [39]:
#We have 2759 / 2950 of the OPEN-ACTIVE General Education Schools in our dataset
MIGENEDUSCHOOLDATA

Unnamed: 0,DistrictCode,District Name,Core Link,School Level,BuildingName_x,School Grade,School Link,Offering Link,ACS_CTE_AGFN,ACS_CTE_ARCHCONS,...,districtdummy_82995,districtdummy_82996,districtdummy_82997,districtdummy_83000,districtdummy_83010,districtdummy_83060,districtdummy_83070,districtdummy_83900,districtdummy_84010,districtdummy_84050
0,1010,Alcona Community Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary School,Alcona Elementary School,K-5,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1010,Alcona Community Schools,https://legacy.mischooldata.org/ParentDashboar...,Middle/High School,Alcona Community High School,6-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,1,0,...,0,0,0,0,0,0,0,0,0,0
2,2010,AuTrain-Onota Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary/Middle School,AuTrain-Onota Public School,K-8,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020,Burt Township School District,https://legacy.mischooldata.org/ParentDashboar...,Elementary through High School,Burt Township School,K-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2070,Munising Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary/Middle School,William G. Mather Elementary School,K-6,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3791,83060,Manton Consolidated Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary/Middle School,Manton Consolidated Middle School,5-8,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,1,0,0,0,0
3792,83060,Manton Consolidated Schools,https://legacy.mischooldata.org/ParentDashboar...,High School,Manton Consolidated High School,9-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,1,0,0,0,0
3793,83070,Mesick Consolidated Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary School,Floyd M. Jewett Elem. School,K-5,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,1,0,0,0
3794,83070,Mesick Consolidated Schools,https://legacy.mischooldata.org/ParentDashboar...,Middle/High School,Mesick Consolidated Jr/Sr High School,6-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,1,0,0,0


### Complete Missing

In [40]:
MIGENEDUSCHOOLDATA[MIGENEDUSCHOOLDATA['complete_missing'] == 1]

Unnamed: 0,DistrictCode,District Name,Core Link,School Level,BuildingName_x,School Grade,School Link,Offering Link,ACS_CTE_AGFN,ACS_CTE_ARCHCONS,...,districtdummy_82995,districtdummy_82996,districtdummy_82997,districtdummy_83000,districtdummy_83010,districtdummy_83060,districtdummy_83070,districtdummy_83900,districtdummy_84010,districtdummy_84050
94,7010,Arvon Township School District,https://legacy.mischooldata.org/ParentDashboar...,Elementary/Middle School,Arvon Township School,K-8,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
103,8030,Hastings Area School District,https://legacy.mischooldata.org/ParentDashboar...,Elementary School,Northeastern Elementary School,K-5,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
122,9010,Bay City School District,https://legacy.mischooldata.org/ParentDashboar...,Elementary School,Hampton Elementary School,K-5,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
382,15902,Charlevoix Academy,https://legacy.mischooldata.org/ParentDashboar...,Elementary through High School,Charlevoix Academy,K-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
449,19125,Pewamo-Westphalia Community Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary School,Pewamo-Westphalia Elementary School,K-5,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3726,82938,Summit Academy North,https://legacy.mischooldata.org/ParentDashboar...,High School,Summit Academy North High School,9-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
3727,82940,Voyageur Academy,https://legacy.mischooldata.org/ParentDashboar...,Elementary/Middle School,Voyageur Academy,K-8,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
3744,82957,Hope of Detroit Academy,https://legacy.mischooldata.org/ParentDashboar...,Elementary through High School,Hope of Detroit Academy - Middle/High,5-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
3747,82963,George Washington Carver Academy,https://legacy.mischooldata.org/ParentDashboar...,Elementary School,George Washington Carver Elementary School,K-4,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0


In [41]:
# Select the relevant columns from the dataset
data = MIGENEDUSCHOOLDATA[['complete_missing', 'per_white', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'High School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Suburb', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']].copy()

# Drop any rows with missing values
data = data.dropna()

# Define the predictors (X) and the outcome variable (y)
X = data[['per_white', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']]
y = data['complete_missing']

# Add a constant column to the predictors
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X)
results = model.fit()

# Print the regression summary
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:       complete_missing   R-squared:                       0.012
Model:                            OLS   Adj. R-squared:                  0.006
Method:                 Least Squares   F-statistic:                     1.923
Date:                Mon, 27 Nov 2023   Prob (F-statistic):             0.0109
Time:                        10:31:03   Log-Likelihood:                 899.96
No. Observations:                2759   AIC:                            -1762.
Df Residuals:                    2740   BIC:                            -1649.
Df Model:                          18                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
const       

In [42]:
# Select the relevant columns from the dataset
data = MIGENEDUSCHOOLDATA[['complete_missing', 'per_whiteasian', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'High School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Suburb', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']].copy()

# Drop any rows with missing values
data = data.dropna()

# Define the predictors (X) and the outcome variable (y)
X = data[['per_whiteasian', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']]
y = data['complete_missing']

# Add a constant column to the predictors
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X)
results = model.fit()

# Print the regression summary
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:       complete_missing   R-squared:                       0.012
Model:                            OLS   Adj. R-squared:                  0.006
Method:                 Least Squares   F-statistic:                     1.904
Date:                Mon, 27 Nov 2023   Prob (F-statistic):             0.0120
Time:                        10:31:05   Log-Likelihood:                 899.79
No. Observations:                2759   AIC:                            -1762.
Df Residuals:                    2740   BIC:                            -1649.
Df Model:                          18                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
const       

### Voluntary Missing

In [43]:
MIGENEDUSCHOOLDATA[MIGENEDUSCHOOLDATA['voluntary_missing'] == 1]

Unnamed: 0,DistrictCode,District Name,Core Link,School Level,BuildingName_x,School Grade,School Link,Offering Link,ACS_CTE_AGFN,ACS_CTE_ARCHCONS,...,districtdummy_82995,districtdummy_82996,districtdummy_82997,districtdummy_83000,districtdummy_83010,districtdummy_83060,districtdummy_83070,districtdummy_83900,districtdummy_84010,districtdummy_84050
0,1010,Alcona Community Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary School,Alcona Elementary School,K-5,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2010,AuTrain-Onota Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary/Middle School,AuTrain-Onota Public School,K-8,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020,Burt Township School District,https://legacy.mischooldata.org/ParentDashboar...,Elementary through High School,Burt Township School,K-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2070,Munising Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Elementary/Middle School,William G. Mather Elementary School,K-6,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
5,2070,Munising Public Schools,https://legacy.mischooldata.org/ParentDashboar...,Middle/High School,Munising High and Middle School,6-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3744,82957,Hope of Detroit Academy,https://legacy.mischooldata.org/ParentDashboar...,Elementary through High School,Hope of Detroit Academy - Middle/High,5-12,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
3745,82958,Joy Preparatory Academy,https://legacy.mischooldata.org/ParentDashboar...,Elementary/Middle School,Joy Preparatory Academy,K-8,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
3747,82963,George Washington Carver Academy,https://legacy.mischooldata.org/ParentDashboar...,Elementary School,George Washington Carver Elementary School,K-4,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0
3748,82963,George Washington Carver Academy,https://legacy.mischooldata.org/ParentDashboar...,Elementary/Middle School,George Washington Carver Middle School,5-8,https://legacy.mischooldata.org/ParentDashboar...,https://legacy.mischooldata.org/ParentDashboar...,0,0,...,0,0,0,0,0,0,0,0,0,0


In [44]:
# Select the relevant columns from the dataset
data = MIGENEDUSCHOOLDATA[['voluntary_missing', 'per_white', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'High School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Suburb', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']].copy()

# Drop any rows with missing values
data = data.dropna()

# Define the predictors (X) and the outcome variable (y)
X = data[['per_white', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']]
y = data['voluntary_missing']

# Add a constant column to the predictors
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X)
results = model.fit()

# Print the regression summary
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:      voluntary_missing   R-squared:                       0.109
Model:                            OLS   Adj. R-squared:                  0.103
Method:                 Least Squares   F-statistic:                     18.68
Date:                Mon, 27 Nov 2023   Prob (F-statistic):           8.88e-57
Time:                        10:31:09   Log-Likelihood:                -674.98
No. Observations:                2759   AIC:                             1388.
Df Residuals:                    2740   BIC:                             1500.
Df Model:                          18                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
const       

In [45]:
# Select the relevant columns from the dataset
data = MIGENEDUSCHOOLDATA[['voluntary_missing', 'per_whiteasian', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'High School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Suburb', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']].copy()

# Drop any rows with missing values
data = data.dropna()

# Define the predictors (X) and the outcome variable (y)
X = data[['per_whiteasian', 'per_ecodis', 'tot_all', 'Elementary School', 'Elementary through High School', 'Elementary/Middle School', 'Middle School', 'Middle/High School', 'Other', 'City', 'Rural', 'Town', 'Not Specified', 'Charter', 'Magnet', 'STRatio', 'Total Building PPE', 'EC_FEE', 'CompositeIndex']]
y = data['voluntary_missing']

# Add a constant column to the predictors
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X)
results = model.fit()

# Print the regression summary
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:      voluntary_missing   R-squared:                       0.108
Model:                            OLS   Adj. R-squared:                  0.102
Method:                 Least Squares   F-statistic:                     18.43
Date:                Mon, 27 Nov 2023   Prob (F-statistic):           6.01e-56
Time:                        10:31:11   Log-Likelihood:                -677.00
No. Observations:                2759   AIC:                             1392.
Df Residuals:                    2740   BIC:                             1505.
Df Model:                          18                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
const       

# Saving Data

In [46]:
MIFULLSCHOOLDATA.to_csv("C:/Users/johnw/Box/Michigan Parent Dashboard/Michigan Data/MIFULLSCHOOLDATA_11_27_23.csv", index=True)
MIGENEDUSCHOOLDATA.to_csv("C:/Users/johnw/Box/Michigan Parent Dashboard/Michigan Data/MIGENEDUSCHOOLDATA_11_27_23.csv", index=True)