In [32]:
# Dependencies and Setup
import pandas as pd


# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()



Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [169]:
# Find totals
Total_schools = len(school_data_complete["school_name"].unique())
Total_students = len(school_data_complete["student_name"].unique())
Total_budget = sum(school_data_complete["budget"].unique())

# Calculate Average scores and passing rate

Average_math_score = school_data_complete["math_score"].mean()
Average_reading_score = school_data_complete["reading_score"].mean()
Percentage_passing_math = (school_data_complete[school_data_complete["math_score"]>=70]["student_name"].count()/Total_students)*100
Percentage_passing_reading = (school_data_complete[school_data_complete["reading_score"]>=70]["student_name"].count()/Total_students)*100
Overall_passing_rate = (Average_math_score + Average_reading_score)/2

# Dataframe to show results

District_summary = pd.DataFrame({"Total schools": [Total_schools], "Total students": [Total_students], "Total budget": [Total_budget], "Average Math score": [Average_math_score], "Average Reading Score": [Average_reading_score], "% of students Passing Math": [Percentage_passing_math], "% of students Passing Reading": [Percentage_passing_reading], "Overall Passing Score %": [Overall_passing_rate]})

District_summary

Unnamed: 0,Total schools,Total students,Total budget,Average Math score,Average Reading Score,% of students Passing Math,% of students Passing Reading,Overall Passing Score %
0,15,32715,24649428,78.985371,81.87784,89.775332,102.735748,80.431606


In [49]:
# Calculate each column

School_grouped_pd = school_data_complete.groupby("school_name")
Total_students = School_grouped_pd["Student ID"].count()
School_type = School_grouped_pd["type"].unique()
Total_budget = School_grouped_pd["budget"].unique()
Budget_per_student = Total_budget/Total_students

# Calculate Average scores and percentage

Avg_math_score = School_grouped_pd["math_score"].mean()
Avg_reading_score = School_grouped_pd["reading_score"].mean()
Passing_math_grouped = school_data_complete[school_data_complete["math_score"]>=70].groupby("school_name")["math_score"].count()
Percent_passing_math = (Passing_math_grouped/Total_students)*100
Passing_reading_grouped = school_data_complete[school_data_complete["reading_score"]>=70].groupby("school_name")["reading_score"].count()
Percent_passing_reading = (Passing_reading_grouped/Total_students)*100
Overall_passing_percentage = (Percent_passing_math + Percent_passing_reading)/2

# Show results in DataFrame

Schools_summary = pd.DataFrame({"School Type": [School_type], "Total students": [Total_students], "Total budget": [Total_budget], "Budget Per Student": [Budget_per_student], "Avg Math Score": [Avg_math_score], "Avg Reading Score": [Avg_reading_score], "% of students passing math": [Percent_passing_math], "% of students passing reading": [Percent_passing_reading], "Overall Passing Rate": [Overall_passing_percentage]})

Schools_summary.index.name = None



In [50]:
# Sort and display the top-five performing schools
Top_Five_Schools = Schools_summary.sort_values(by="Overall Passing Rate", ascending=False)
Top_Five_Schools.head()

Unnamed: 0,School Type,Total students,Total budget,Budget Per Student,Avg Math Score,Avg Reading Score,% of students passing math,% of students passing reading,Overall Passing Rate
0,school_name Bailey High School [District...,school_name Bailey High School 4976 Cabr...,school_name Bailey High School [3124928]...,school_name Bailey High School [628.0] C...,school_name Bailey High School 77.048432...,school_name Bailey High School 81.033963...,school_name Bailey High School 66.680064...,school_name Bailey High School 81.933280...,school_name Bailey High School 74.306672...


In [51]:
# Sort and display the five worst_performing schools
Worst_Five_Schools = Schools_summary.sort_values(by="Overall Passing Rate", ascending=True)
Worst_Five_Schools.head()

Unnamed: 0,School Type,Total students,Total budget,Budget Per Student,Avg Math Score,Avg Reading Score,% of students passing math,% of students passing reading,Overall Passing Rate
0,school_name Bailey High School [District...,school_name Bailey High School 4976 Cabr...,school_name Bailey High School [3124928]...,school_name Bailey High School [628.0] C...,school_name Bailey High School 77.048432...,school_name Bailey High School 81.033963...,school_name Bailey High School 66.680064...,school_name Bailey High School 81.933280...,school_name Bailey High School 74.306672...


In [36]:
# Create a series for each grade and group by school


ninth_Grade_math_grouped = school_data_complete[school_data_complete["grade"]=="9th"].groupby("school_name")["math_score"].mean()
tenth_Grade_math_grouped = school_data_complete[school_data_complete["grade"]=="10th"].groupby("school_name")["math_score"].mean()
eleventh_Grade_math_grouped = school_data_complete[school_data_complete["grade"]=="11th"].groupby("school_name")["math_score"].mean()
twelvth_Grade_math_grouped = school_data_complete[school_data_complete["grade"]=="12th"].groupby("school_name")["math_score"].mean()

# Create a Dataframe to show results
Math_Scores_By_Grade = pd.DataFrame({"9th Grade": ninth_Grade_math_grouped, "10th Grade": tenth_Grade_math_grouped, "11th Grade": eleventh_Grade_math_grouped, "12th Grade": twelvth_Grade_math_grouped})
Math_Scores_By_Grade


Unnamed: 0_level_0,9th Grade,10th Grade,11th Grade,12th Grade
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


In [37]:
# Create a series for each grade and group by school


ninth_Grade_reading_grouped = school_data_complete[school_data_complete["grade"]=="9th"].groupby("school_name")["reading_score"].mean()
tenth_Grade_reading_grouped = school_data_complete[school_data_complete["grade"]=="10th"].groupby("school_name")["reading_score"].mean()
eleventh_Grade_reading_grouped = school_data_complete[school_data_complete["grade"]=="11th"].groupby("school_name")["reading_score"].mean()
twelvth_Grade_reading_grouped = school_data_complete[school_data_complete["grade"]=="12th"].groupby("school_name")["reading_score"].mean()

# Create a Dataframe to show results
Reading_Scores_By_Grade = pd.DataFrame({"9th Grade": ninth_Grade_reading_grouped, "10th Grade": tenth_Grade_reading_grouped, "11th Grade": eleventh_Grade_reading_grouped, "12th Grade": twelvth_Grade_reading_grouped})
Reading_Scores_By_Grade


Unnamed: 0_level_0,9th Grade,10th Grade,11th Grade,12th Grade
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


In [40]:
# Create a table that breaks down school performances based on average Spending Ranges (Per Student)
# Create bins
spending_bins = [0, 585, 615, 645, 675]
group_names = ["<$585", "$585-615", "$615-645", "$645-675"]

                                           

In [53]:
Scores_spending = Schools_summary.loc[:,["Avg Math Score", "Avg Reading Score", "% of students passing math", "% of students passing reading", "Overall Passing Rate",]]

Scores_spending["Spending Ranges (Per Student)"] = pd.cut(Schools_summary["Budget Per Student"], spending_bins, labels=group_names)
Scores_spending = Scores_spending.groupby("Spending Ranges (Per Student)").mean()
Scores_spending.head()

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
# Scores by schools size
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]


In [54]:
Scores-size = Schools_summary.loc[:, ["Avg Math Score", "Avg Reading Score", "% of students passing math", "% of students passing reading", "Overall Passing Rate"]]

Scores_size["School Size"] = pd.cut(Schools_summary["Total students"], size_bins, labels=group_names)

scores_size = scores_size.groupby("School Size").mean()
scores_size.head()

SyntaxError: can't assign to operator (<ipython-input-54-b664dc70b2a8>, line 1)

In [60]:
# Create a new dataframe by school type
Scores_school_type = Schools_summary.loc[:, ['School Type','Avg Math Score',
                                  'Avg Reading Score','% of students passing math',
                                  '% of students passing reading','Overall Passing Rate',]]
# Create a group based on school type
Scores_school_type = Scores_school_type.groupby('School Type').mean()
Scores_school_type.head()

DataError: No numeric types to aggregate