In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

In [2]:
# Load the dataset
data=pd.read_csv('General_Education_School.csv')

data.head(6)

Unnamed: 0,DBN,School Name,School Type,Parent Response Rate,Teacher Response Rate,Student Response Rate,Total Safety and Respect Score,Total Communication Score,Total Engagement Score,Total Academic Expectations Score
0,dbn,locationname,,rr_p,rr_t,rr_s,saf_tot_12,com_tot_12,eng_tot_12,aca_tot_12
1,01M015,P.S. 015 Roberto Clemente,Elementary School,61%,92%,,8.4,8.2,8.3,8.2
2,01M019,P.S. 019 Asher Levy,Elementary School,92%,94%,,8.4,8,8,8.2
3,01M020,P.S. 020 Anna Silver,Elementary School,88%,73%,,8.5,7.8,8.2,8.2
4,01M034,P.S. 034 Franklin D. Roosevelt,Elementary/Middle School,39%,58%,96%,7.3,6.9,7.3,7.7
5,01M063,P.S. 063 William Mckinley,Elementary School,58%,100%,,8.5,7.4,7.6,7.8


In [3]:
## Delete the first row

data.drop(0,inplace=True)


In [4]:

#reset the index if needed
data = data.reset_index(drop=True)
data.head()

Unnamed: 0,DBN,School Name,School Type,Parent Response Rate,Teacher Response Rate,Student Response Rate,Total Safety and Respect Score,Total Communication Score,Total Engagement Score,Total Academic Expectations Score
0,01M015,P.S. 015 Roberto Clemente,Elementary School,61%,92%,,8.4,8.2,8.3,8.2
1,01M019,P.S. 019 Asher Levy,Elementary School,92%,94%,,8.4,8.0,8.0,8.2
2,01M020,P.S. 020 Anna Silver,Elementary School,88%,73%,,8.5,7.8,8.2,8.2
3,01M034,P.S. 034 Franklin D. Roosevelt,Elementary/Middle School,39%,58%,96%,7.3,6.9,7.3,7.7
4,01M063,P.S. 063 William Mckinley,Elementary School,58%,100%,,8.5,7.4,7.6,7.8


In [5]:
#check if data start from index 0
data.index[0]

0

In [6]:
#check if dataset has duplicate rows
data.duplicated().sum()

3

In [7]:
#delete duplacate rows
data = data.drop_duplicates()


In [8]:
# 1. Number of participating schools
num_schools = data.shape[0]
print(f"Number of participating schools: {num_schools}")


Number of participating schools: 1667


In [9]:
# 2. Types of schools in the dataset

school_types=data['School Type'].unique()
print("Types of schools:", school_types)

Types of schools: ['Elementary School' 'Elementary/Middle School' 'Middle/High School'
 'Middle School' 'High School' 'Transfer High School'
 'Elementary/Middle/High School' 'Early Childhood School' 'YABC']


In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1667 entries, 0 to 1666
Data columns (total 10 columns):
 #   Column                             Non-Null Count  Dtype 
---  ------                             --------------  ----- 
 0   DBN                                1667 non-null   object
 1   School Name                        1667 non-null   object
 2   School Type                        1667 non-null   object
 3   Parent Response Rate               1667 non-null   object
 4   Teacher Response Rate              1667 non-null   object
 5   Student Response Rate              994 non-null    object
 6   Total Safety and Respect Score     1667 non-null   object
 7   Total Communication Score          1667 non-null   object
 8   Total Engagement Score             1667 non-null   object
 9   Total Academic Expectations Score  1667 non-null   object
dtypes: object(10)
memory usage: 143.3+ KB


In [11]:

# --- Data Cleaning ---
# Convert percentage strings to float
for col in ['Parent Response Rate', 'Teacher Response Rate', 'Student Response Rate']:
    data[col] = data[col].str.rstrip('%').astype('float') / 100


data.head()

Unnamed: 0,DBN,School Name,School Type,Parent Response Rate,Teacher Response Rate,Student Response Rate,Total Safety and Respect Score,Total Communication Score,Total Engagement Score,Total Academic Expectations Score
0,01M015,P.S. 015 Roberto Clemente,Elementary School,0.61,0.92,,8.4,8.2,8.3,8.2
1,01M019,P.S. 019 Asher Levy,Elementary School,0.92,0.94,,8.4,8.0,8.0,8.2
2,01M020,P.S. 020 Anna Silver,Elementary School,0.88,0.73,,8.5,7.8,8.2,8.2
3,01M034,P.S. 034 Franklin D. Roosevelt,Elementary/Middle School,0.39,0.58,0.96,7.3,6.9,7.3,7.7
4,01M063,P.S. 063 William Mckinley,Elementary School,0.58,1.0,,8.5,7.4,7.6,7.8


In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1667 entries, 0 to 1666
Data columns (total 10 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   DBN                                1667 non-null   object 
 1   School Name                        1667 non-null   object 
 2   School Type                        1667 non-null   object 
 3   Parent Response Rate               1667 non-null   float64
 4   Teacher Response Rate              1667 non-null   float64
 5   Student Response Rate              994 non-null    float64
 6   Total Safety and Respect Score     1667 non-null   object 
 7   Total Communication Score          1667 non-null   object 
 8   Total Engagement Score             1667 non-null   object 
 9   Total Academic Expectations Score  1667 non-null   object 
dtypes: float64(3), object(7)
memory usage: 143.3+ KB


In [13]:
data['Total Safety and Respect Score']=data['Total Safety and Respect Score'].astype(float)
data['Total Communication Score']=data['Total Communication Score'].astype(float)
data['Total Engagement Score']=data['Total Engagement Score'].astype(float)
data['Total Academic Expectations Score']=data['Total Academic Expectations Score'].astype(float)

data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1667 entries, 0 to 1666
Data columns (total 10 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   DBN                                1667 non-null   object 
 1   School Name                        1667 non-null   object 
 2   School Type                        1667 non-null   object 
 3   Parent Response Rate               1667 non-null   float64
 4   Teacher Response Rate              1667 non-null   float64
 5   Student Response Rate              994 non-null    float64
 6   Total Safety and Respect Score     1667 non-null   float64
 7   Total Communication Score          1667 non-null   float64
 8   Total Engagement Score             1667 non-null   float64
 9   Total Academic Expectations Score  1667 non-null   float64
dtypes: float64(7), object(3)
memory usage: 143.3+ KB


In [14]:
# Calculate averages and convert back to percentage for display
avg_parent_response = data['Parent Response Rate'].mean() * 100
avg_teacher_response = data['Teacher Response Rate'].mean() * 100
avg_student_response = data['Student Response Rate'].mean() * 100

print(f"Average parent response rate: {avg_parent_response:.1f}%")
print(f"Average teacher response rate: {avg_teacher_response:.1f}%")
print(f"Average student response rate: {avg_student_response:.1f}%")

Average parent response rate: 56.3%
Average teacher response rate: 83.4%
Average student response rate: 84.5%


In [15]:
#Howmany null value are found in Student Response Rate

student_response_nullValue=data['Student Response Rate'].isnull().sum()


print('Student Response rate hase null value:',student_response_nullValue)

Student Response rate hase null value: 673


In [16]:
#How elementary Schools are found in dataset
elementary_schools = data[data['School Type'] == 'Elementary School'][['School Name','School Type', 'Student Response Rate']]


In [17]:
elementary_schools


Unnamed: 0,School Name,School Type,Student Response Rate
0,P.S. 015 Roberto Clemente,Elementary School,
1,P.S. 019 Asher Levy,Elementary School,
2,P.S. 020 Anna Silver,Elementary School,
4,P.S. 063 William Mckinley,Elementary School,
5,P.S. 064 Robert Simon,Elementary School,
...,...,...,...
1647,Bronx Academy Of Promise Charter School,Elementary School,
1648,Icahn Charter School 3,Elementary School,
1656,Icahn Charter School 4,Elementary School,
1665,Bronx Charter School For Better Learning,Elementary School,


In [18]:
len(elementary_schools)

617

In [19]:
#Student response rate for elementray schools studens
result = elementary_schools[['School Type', 'Student Response Rate']]
print('Student Response Rate for Elementary Schools Students')

result.isnull().sum()

Student Response Rate for Elementary Schools Students


School Type                0
Student Response Rate    589
dtype: int64

In [20]:
#How Early childhood_school are found in Dataset
Early_Childhood_School=data[data['School Type'] == 'Early Childhood School'][['School Name','School Type', 'Student Response Rate']]

len(Early_Childhood_School)

81

In [21]:
#Student response rate for early_childhood schools students
result=Early_Childhood_School[['School Name','School Type', 'Student Response Rate']]

result.isnull().sum()

School Name               0
School Type               0
Student Response Rate    81
dtype: int64

In [22]:
Early_Childhood_School.head()

Unnamed: 0,School Name,School Type,Student Response Rate
54,Yorkville Community School,Early Childhood School,
65,P.S. 267,Early Childhood School,
86,Spruce Street School,Early Childhood School,
153,P.S. 185 - The Early Childhood Discovery And D...,Early Childhood School,
175,P.S. 452,Early Childhood School,


In [23]:
#Howmany Elementary middle schools are found in dataset
Elementary_Middle_School=data[data['School Type'] == 'Elementary/Middle School'][['School Name','School Type', 'Student Response Rate']]

len(Elementary_Middle_School)




160

In [24]:
Elementary_Middle_School.head()

Unnamed: 0,School Name,School Type,Student Response Rate
3,P.S. 034 Franklin D. Roosevelt,Elementary/Middle School,0.96
9,P.S. 140 Nathan Straus,Elementary/Middle School,0.98
11,P.S. 184M Shuang Wen,Elementary/Middle School,0.99
12,P.S. 188 The Island School,Elementary/Middle School,0.97
46,P.S. 111 Adolph S. Ochs,Elementary/Middle School,0.77


In [25]:
Elementary_Middle_School['Student Response Rate'].isnull().sum()

0

In [26]:
# --- Step 2: Data Cleaning ---

# 1. Check for missing values
missing_values = data.isnull().sum()
print("Missing values in each column:\n", missing_values)

Missing values in each column:
 DBN                                    0
School Name                            0
School Type                            0
Parent Response Rate                   0
Teacher Response Rate                  0
Student Response Rate                673
Total Safety and Respect Score         0
Total Communication Score              0
Total Engagement Score                 0
Total Academic Expectations Score      0
dtype: int64


In [27]:
data['Student Response Rate'].fillna(0,inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Student Response Rate'].fillna(0,inplace=True)


In [28]:


data.head()


Unnamed: 0,DBN,School Name,School Type,Parent Response Rate,Teacher Response Rate,Student Response Rate,Total Safety and Respect Score,Total Communication Score,Total Engagement Score,Total Academic Expectations Score
0,01M015,P.S. 015 Roberto Clemente,Elementary School,0.61,0.92,0.0,8.4,8.2,8.3,8.2
1,01M019,P.S. 019 Asher Levy,Elementary School,0.92,0.94,0.0,8.4,8.0,8.0,8.2
2,01M020,P.S. 020 Anna Silver,Elementary School,0.88,0.73,0.0,8.5,7.8,8.2,8.2
3,01M034,P.S. 034 Franklin D. Roosevelt,Elementary/Middle School,0.39,0.58,0.96,7.3,6.9,7.3,7.7
4,01M063,P.S. 063 William Mckinley,Elementary School,0.58,1.0,0.0,8.5,7.4,7.6,7.8


In [29]:
data['Student Response Rate'].isnull().sum()

0

In [30]:
data['Total Safety and Respect Score'].max()

9.6

In [31]:
# --- Step 3: Deeper Analysis ---

# 1. Correlation between parent response rate and safety/communication scores
correlation_safety = data['Parent Response Rate'].corr(data['Total Safety and Respect Score'])
correlation_communication = data['Parent Response Rate'].corr(data['Total Communication Score'])
print(f"Correlation between parent response and safety score: {correlation_safety:.2f}")
print(f"Correlation between parent response and communication score: {correlation_communication:.2f}")


Correlation between parent response and safety score: 0.50
Correlation between parent response and communication score: 0.48


In [35]:


# 1. Correlation between teacher response rate and safety/communication scores
correlation_safety_teacher = data['Teacher Response Rate'].corr(data['Total Safety and Respect Score'])
correlation_communication_teacher = data['Teacher Response Rate'].corr(data['Total Communication Score'])
collelation_Total_Engagement_Score=data['Teacher Response Rate'].corr(data['Total Engagement Score'])
collelation_Total_Academic_Expectations_Score_teacher=data['Teacher Response Rate'].corr(data['Total Academic Expectations Score'])
print(f"Correlation between teacher response and safety score: {correlation_safety_teacher:.2f}")
print(f"Correlation between teacher response and communication score: {correlation_communication_teacher:.2f}")
print(f"Correlation between teacher response and Total Engagement Score: {correlation_communication_teacher:.2f}")
print(f"Correlation between teacher response and Total Engagement Score: {collelation_Total_Academic_Expectations_Score_teacher:.2f}")

Correlation between teacher response and safety score: 0.22
Correlation between teacher response and communication score: 0.27
Correlation between teacher response and Total Engagement Score: 0.27
Correlation between teacher response and Total Engagement Score: 0.28


In [36]:
elementary_data=data[data['School Type']=='Elementary School']
# Calculate correlation between Student Response Rate and another numeric column (e.g., Total Safety and Respect Score)
correlation = elementary_data['Student Response Rate'].corr(elementary_data['Total Safety and Respect Score'])

# Print the result
print(f"Correlation (Elementary Schools only): {correlation:.2f}")

Correlation (Elementary Schools only): -0.12


In [37]:
# 2. Average scores by school type
avg_scores_by_type = data.groupby('School Type')[['Total Safety and Respect Score', 
                                                 'Total Communication Score', 
                                                 'Total Engagement Score', 
                                                 'Total Academic Expectations Score']].mean()
print("Average scores by school type:\n", avg_scores_by_type)

Average scores by school type:
                                Total Safety and Respect Score  \
School Type                                                     
Early Childhood School                               8.637037   
Elementary School                                    8.273906   
Elementary/Middle School                             7.661250   
Elementary/Middle/High School                        7.471429   
High School                                          7.394886   
Middle School                                        7.453082   
Middle/High School                                   7.398851   
Transfer High School                                 8.091837   
YABC                                                 8.377273   

                               Total Communication Score  \
School Type                                                
Early Childhood School                          8.027160   
Elementary School                               7.645867   
Elementary/M

In [184]:
# 3. Statistical summary to identify exceptional schools

print("Statistical summary of scores:\n")

data.describe()

Statistical summary of scores:



Unnamed: 0,Parent Response Rate,Teacher Response Rate,Student Response Rate,Total Safety and Respect Score,Total Communication Score,Total Engagement Score,Total Academic Expectations Score
count,1667.0,1667.0,1667.0,1667.0,1667.0,1667.0,1667.0
mean,0.563359,0.834031,0.504055,7.85033,7.29982,7.582244,7.879544
std,0.250089,0.174267,0.433175,0.745997,0.697943,0.662616,0.624073
min,0.01,0.0,0.0,4.7,4.7,4.8,5.3
25%,0.37,0.75,0.0,7.3,6.8,7.1,7.5
50%,0.57,0.88,0.71,7.9,7.3,7.6,7.9
75%,0.76,0.97,0.92,8.4,7.8,8.0,8.3
max,1.0,1.0,1.0,9.6,9.2,9.3,9.5


In [185]:
# --- Step 4: Recommendations ---
# (These would depend on actual data; here’s an example based on typical findings)
print("\nRecommendations:")
print("- Focus on improving parent response rates in schools with below-average participation.")
print("- Enhance safety and communication in school types with lower scores.")
print("- Use high-performing schools as models for others.")


Recommendations:
- Focus on improving parent response rates in schools with below-average participation.
- Enhance safety and communication in school types with lower scores.
- Use high-performing schools as models for others.
