In [1]:
pip install pandas openpyxl scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import numpy as np


In [3]:
# Step 1: Load the datasets
# Assuming CSV conversion has been done
performance = pd.read_excel('/Users/ifeoma/Downloads/SCHOOL BOARD PERFORMANCE(18_19).xlsx')
funding = pd.read_excel('/Users/ifeoma/Downloads/FUNDING(18_19).xlsx')

In [4]:
data = pd.merge(performance, funding, on='Board Name')

In [5]:
data['Total Funding'] = data[['Total Funding(2018-2019)', 'Total Funding(2019-2020)', 
                              'Total Funding(2020-2021)', 'Total Funding(2021-2022)']].sum(axis=1)

In [6]:
# Select columns needed for PCA
data_for_pca = data[['Graduation Rate 2022','Academic Performance(OSSLT PASS RATE(19/20)', 'Total Funding']]

In [7]:
scaler = StandardScaler()
data_standardized = scaler.fit_transform(data_for_pca)

In [8]:
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
data_imputed = imputer.fit_transform(data_for_pca)


In [9]:
# Standardize the imputed data
scaler = StandardScaler()
data_standardized = scaler.fit_transform(data_imputed)

In [10]:
# Apply PCA
pca = PCA(n_components=1)  # Using 1 component for simplicity
principalComponents = pca.fit_transform(data_standardized)

In [11]:

# Add PCA results back to the original data frame (ensure it matches the rows after dropna or imputation)
data['Composite Score'] = principalComponents

In [12]:
# Normalize composite score to a 0-10 scale for ranking
data['Normalized Score'] = (data['Composite Score'] - data['Composite Score'].min()) / (data['Composite Score'].max() - data['Composite Score'].min()) * 10

In [13]:
# Sort by 'Normalized Score' to rank school boards
data_sorted = data.sort_values(by='Normalized Score', ascending=False)
data_sorted[['Board Name', 'Normalized Score']]

# Round the 'Normalized Score' to one decimal place
data_sorted['Normalized Score'] = data_sorted['Normalized Score'].round(1)

# Create a new column to format the results as "Board Name - Score"
data_sorted['Formatted Ranking'] = data_sorted['Board Name'] + ' - ' + data_sorted['Normalized Score'].astype(str)

# Select the 'Formatted Ranking' column to display the results
rankings = data_sorted['Formatted Ranking'].tolist()

# Display the rankings
for ranking in rankings:
    print(ranking)



Superior-Greenstone DSB - 10.0
Keewatin-Patricia DSB - 9.9
DSB Ontario North East - 8.9
Grand Erie DSB - 7.8
Algoma DSB - 7.5
Kenora CDSB - 7.1
Rainbow DSB - 6.8
Lambton Kent DSB - 6.4
Rainy River DSB - 6.4
Near North DSB - 6.4
Bluewater DSB - 6.0
Lakehead DSB - 5.9
Hastings & Prince Edward DSB - 5.8
CS public du Nord-Est de l'Ontario - 5.6
Northeastern CDSB - 5.6
Thames Valley DSB - 5.6
Nipissing-Parry Sound CDSB - 5.5
Trillium Lakelands DSB - 5.5
James Bay Lowlands SSB - 5.0
Hamilton-Wentworth DSB - 4.8
Greater Essex County DSB - 4.8
Thunder Bay CDSB - 4.8
Renfrew County DSB - 4.7
Limestone DSB - 4.5
Avon Maitland DSB - 4.5
Upper Canada DSB - 4.3
Bruce-Grey CDSB - 4.2
Kawartha Pine Ridge DSB - 4.1
Renfrew County CDSB - 4.0
Simcoe County DSB - 3.8
DSB Niagara - 3.7
Waterloo Region DSB - 3.7
Huron Perth CDSB - 3.7
Simcoe Muskoka CDSB - 3.6
St Clair CDSB - 3.5
Sudbury CDSB - 3.5
Upper Grand DSB - 3.5
Hamilton-Wentworth CDSB - 3.4
CDSB of Eastern Ontario - 3.4
Peterborough Victoria North

In [14]:
import os
# Define the file path for the Excel file on your Desktop
home_path = os.path.expanduser('~')  # Gets the home directory
file_path = os.path.join(home_path, 'Desktop', 'school_board_rankings.xlsx')

# Select the columns you want to write to the Excel file
data_to_save = data_sorted[['Board Name', 'Normalized Score']]

# Write the selected DataFrame to an Excel file
data_to_save.to_excel(file_path, index=False, sheet_name='School Board Rankings')

print(f"Rankings have been saved to {file_path}")

Rankings have been saved to /Users/ifeoma/Desktop/school_board_rankings.xlsx
