In [8]:
import pandas as pd

#Read and preview data
schools = pd.read_csv("schools.csv")
schools.head(3)


Unnamed: 0,school_name,borough,building_code,average_math,average_reading,average_writing,percent_tested
0,"New Explorations into Science, Technology and ...",Manhattan,M022,657,601,601,
1,Essex Street Academy,Manhattan,M445,395,411,387,78.9
2,Lower Manhattan Arts Academy,Manhattan,M445,418,428,415,65.1


#Finding schools with the best math scores

In [9]:
best_score = 800 * 0.8
#Filtering schools
best_math_schools = schools[schools["average_math"] >= best_score]

#Select columns "school_name" and "average_math"
best_math_schools = best_math_schools[["school_name","average_math"]]

#Sort from largest to smallest DataFrame for "average_math"
best_math_schools = best_math_schools.sort_values("average_math", ascending=False)
best_math_schools


Unnamed: 0,school_name,average_math
88,Stuyvesant High School,754
170,Bronx High School of Science,714
93,Staten Island Technical High School,711
365,Queens High School for the Sciences at York Co...,701
68,"High School for Mathematics, Science, and Engi...",683
280,Brooklyn Technical High School,682
333,Townsend Harris High School,680
174,High School of American Studies at Lehman College,669
0,"New Explorations into Science, Technology and ...",657
45,Eleanor Roosevelt High School,641


#Identifying the top 10 performing schools

In [10]:
#Adding column "total_SAT"
schools["total_SAT"] = schools["average_math"] + schools["average_reading"] + schools["average_writing"]

#Sort values by descending 
schools_sorted = schools.sort_values("total_SAT",ascending=False)
#Select columns "school_name" and "total_SAT"
schools_sorted = schools_sorted[["school_name","total_SAT"]]

top_10_schools = schools_sorted.head(10)
top_10_schools

Unnamed: 0,school_name,total_SAT
88,Stuyvesant High School,2144
170,Bronx High School of Science,2041
93,Staten Island Technical High School,2041
174,High School of American Studies at Lehman College,2013
333,Townsend Harris High School,1981
365,Queens High School for the Sciences at York Co...,1947
5,Bard High School Early College,1914
280,Brooklyn Technical High School,1896
45,Eleanor Roosevelt High School,1889
68,"High School for Mathematics, Science, and Engi...",1889


#Locating the NYC borough with the largest standard deviation in SAT performance

In [11]:
#Group by 'borough' and count schools, avg and std for 'total_SAT'
boroughs = schools.groupby("borough").agg({"total_SAT":["count","mean","std"]}).round(2)

#Rename columns
boroughs.columns = ["num_schools", "average_SAT","std_SAT"]

#Creating a new DF contain row 'std_SAT and equal value max
largest_std_dev = boroughs[boroughs["std_SAT"] == boroughs["std_SAT"].max()]
largest_std_dev

Unnamed: 0_level_0,num_schools,average_SAT,std_SAT
borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Manhattan,89,1340.13,230.29
