In [1]:
import warnings
import pandas as pd
from pathlib import Path

In [4]:
# File Path
school_metrics_data_to_load = Path("SPREE_SY2223_School_Metric_Scores (1).csv")

# Read Hospital General, Score, and Location Data File and store into Pandas DataFrames
school_metrics_data = pd.read_csv(school_metrics_data_to_load)

In [5]:
school_metrics_data.head()

Unnamed: 0,accountability_year,school_id,school_name,report_type,metric,numerator,denominator,metric_score,score_previous,score_change,tier
0,2022-2023,1010,John Bartram High School,High School,Admission Type,,,Catchment,Neighborhood,Not Applicable,
1,2022-2023,1010,John Bartram High School,High School,% of Students Attending Less Than 80% of Instr...,292.0,617.0,47.3,,,
2,2022-2023,1010,John Bartram High School,High School,% of Students Attending 80-85% of Instructiona...,51.0,617.0,8.3,,,
3,2022-2023,1010,John Bartram High School,High School,% of Students Attending 85-90% of Instructiona...,66.0,617.0,10.7,,,
4,2022-2023,1010,John Bartram High School,High School,% of Students Attending at Least 90% of Instru...,208.0,617.0,33.7,33.8,-0.1,Maintaining


In [6]:
school_metrics_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 83980 entries, 0 to 83979
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   accountability_year  83980 non-null  object 
 1   school_id            83980 non-null  int64  
 2   school_name          83980 non-null  object 
 3   report_type          83980 non-null  object 
 4   metric               83980 non-null  object 
 5   numerator            20464 non-null  float64
 6   denominator          17254 non-null  float64
 7   metric_score         83980 non-null  object 
 8   score_previous       71701 non-null  object 
 9   score_change         71706 non-null  object 
 10  tier                 11507 non-null  object 
dtypes: float64(2), int64(1), object(8)
memory usage: 7.0+ MB


In [16]:
#Calculate the number of schools
schools_count = school_metrics_data['school_id'].nunique()
print("Number of unique schools:",schools_count)

# List the unique school ids
unique_ids = school_metrics_data['school_id'].unique()
# print("Number of unique schools:",unique_ids)

Number of unique schools: 294


In [15]:
#Calculate the number of school names
school_name_count = school_metrics_data['school_name'].nunique()
print("Number of unique schools:",school_name_count)

# List the unique school names
unique_names = school_metrics_data['school_name'].unique()
# print("Number of unique schools:",unique_names)

Number of unique schools: 294


In [17]:
#Calculate the number of unique metrics
metrics_count=school_metrics_data['metric'].nunique()
print("Number of unique metrics:",metrics_count)

# List the unique metrics
unique_metrics = school_metrics_data['metric'].unique()
# print("Unique metrics:", unique_metrics)

Number of unique metrics: 260


In [10]:
#Calculate the number of Report Types
report_types_count = school_metrics_data['report_type'].nunique()
print("Number of report types:",report_types_count)

# List the Report Types
unique_reports = school_metrics_data['report_type'].unique()
print("Report Types:", unique_reports)

Number of report types: 4
Report Types: ['High School' 'Middle School' 'K-8 School' 'Elementary School']


In [35]:
# Identify duplicates based on 'school_id' and 'metric'
duplicates = school_metrics_data[school_metrics_data.duplicated(subset=['school_id', 'metric'], keep=False)]
filtered_data = duplicates[duplicates['school_name'] == 'Julia R. Masterman School']

duplicate_school_count = duplicates['school_name'].nunique()
print("Number of unique schools:", duplicate_school_count)

duplicate_schools = duplicates['school_name'].unique()
print("Number of unique schools:", duplicate_schools)

Number of unique schools: 29
Number of unique schools: ['Julia R. Masterman School' 'Girard Academic Music Program'
 'The Science Leadership Academy at Beeber'
 'Community Academy of Philadelphia Charter School'
 'Freire Charter School' 'Math, Civics and Sciences Charter School'
 'Philadelphia Academy Charter School'
 'Mastery Charter School at Hardy Williams'
 'Mathematics, Science, and Technology Community Charter School (MaST)'
 'Mariana Bracetti Academy Charter School'
 'Esperanza Academy Charter School' 'New Foundations Charter School'
 'Philadelphia Performing Arts: A String Theory Charter School'
 'Mastery Charter School at Lenfest'
 'First Philadelphia Preparatory Charter School'
 'Maritime Academy Charter School (MACHS)' 'Belmont Charter School'
 'Mastery Charter School at Shoemaker'
 'Mastery Charter School at Thomas Campus'
 'Boys Latin of Philadelphia Charter School'
 'Mastery Charter School at Pickett'
 'Sankofa Freedom Academy Charter School' 'Tacony Academy Charter Schoo

In [36]:
# Create an attendence dataframe


# Metrics to pivot
selected_metrics = ['% of Students Attending at Least 90% of Instructional Days', 
                    '% of Students Attending Less Than 80% of Instructional Days', 
                    '% of Students Attending 80-85% of Instructional Days', 
                    '% of Students Attending 85-90% of Instructional Days', 
                    '% of Students Attending 90-95% of Instructional Days', 
                    '% of Students Attending at Least 95% of Instructional Days', 
                    'Number of Dropouts - Last School of Touch (Grades 7-12)'
                   ]

# Aggregate the data to ensure unique combinations of school_id and metric
aggregated_data = school_metrics_data[school_metrics_data['metric'].isin(selected_metrics)].groupby(['school_id', 'metric'], as_index=False).agg({'numerator': 'mean'})

# Pivot the DataFrame
attendence_df = aggregated_data[aggregated_data['metric'].isin(selected_metrics)].pivot(index='school_id', columns='metric', values='numerator').reset_index()

# Rename columns if necessary
attendence_df.columns.name = None

attendence_df

Unnamed: 0,school_id,% of Students Attending 80-85% of Instructional Days,% of Students Attending 85-90% of Instructional Days,% of Students Attending 90-95% of Instructional Days,% of Students Attending Less Than 80% of Instructional Days,% of Students Attending at Least 90% of Instructional Days,% of Students Attending at Least 95% of Instructional Days,Number of Dropouts - Last School of Touch (Grades 7-12)
0,1010,51.0,66.0,76.0,292.0,208.0,132.0,
1,1020,73.0,104.0,94.0,247.0,184.0,90.0,
2,1030,55.0,119.0,152.0,153.0,251.0,99.0,
3,1050,28.0,55.0,96.0,27.0,231.0,135.0,
4,1100,66.0,77.0,82.0,157.0,174.0,92.0,
...,...,...,...,...,...,...,...,...
289,8430,22.0,60.0,222.0,13.0,806.0,584.0,
290,8440,49.0,111.0,329.0,44.0,926.0,597.0,
291,8560,12.0,20.0,31.0,12.0,160.0,129.0,
292,8770,45.0,91.0,236.0,57.0,573.0,337.0,


In [37]:
duplicates

Unnamed: 0,accountability_year,school_id,school_name,report_type,metric,numerator,denominator,metric_score,score_previous,score_change,tier
8320,2022-2023,2140,Julia R. Masterman School,High School,Admission Type,,,Criteria-Based,Criteria-Based,Not Applicable,
8321,2022-2023,2140,Julia R. Masterman School,High School,% of Students Attending Less Than 80% of Instr...,7.0,460.0,1.5,,,
8322,2022-2023,2140,Julia R. Masterman School,High School,% of Students Attending 80-85% of Instructiona...,5.0,460.0,1.1,,,
8323,2022-2023,2140,Julia R. Masterman School,High School,% of Students Attending 85-90% of Instructiona...,9.0,460.0,2,,,
8324,2022-2023,2140,Julia R. Masterman School,High School,% of Students Attending at Least 90% of Instru...,439.0,460.0,95.4,96.5,-1.1,Not Improving
...,...,...,...,...,...,...,...,...,...,...,...
83975,2022-2023,8780,Philadelphia Virtual Academy,K-8 School,PSSA Science: % of Students Proficient or Adva...,,,Insufficient Sample,Not Applicable,Not Applicable,
83976,2022-2023,8780,Philadelphia Virtual Academy,K-8 School,% of Teachers Rated as Distinguished,,,Insufficient Sample,Insufficient Sample,Not Applicable,
83977,2022-2023,8780,Philadelphia Virtual Academy,K-8 School,% of Teachers Rated as Proficient,,,Insufficient Sample,Insufficient Sample,Not Applicable,
83978,2022-2023,8780,Philadelphia Virtual Academy,K-8 School,% of Teachers Attending at Least 90% of Instru...,13.0,17.0,76.5,85.7,-9.2,Not Improving
