In [1]:
import pandas as pd
import numpy as np

In [2]:
# Read in the data files to use for this assignment
csv_path = "../Resources/schools_complete.csv"
complete_schools_data = pd.read_csv(csv_path)

csv_path = "../Resources/students_complete.csv"
complete_students_data = pd.read_csv(csv_path)

In [3]:
# Confirm the school data loaded
complete_schools_data.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [4]:
# Confirm the student data loaded
complete_students_data.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [5]:
# Create bins for passing scores
bins = [0, 66, 100]

# Create labels for the bins
score_labels = ["Not Passing", "Passing"]

In [6]:
# Determine basic data for the District Summary
school_count = complete_schools_data["school_name"].count()
student_count = complete_students_data["student_name"].count()
district_budget = complete_schools_data["budget"].sum()
avg_district_math_score = complete_students_data["math_score"].mean()
avg_district_reading_score = complete_students_data["reading_score"].mean()

In [7]:
#print(school_count)
#print(student_count)
#print(district_budget)
#print(avg_district_math_score)
#print(avg_district_reading_score)

In [8]:
# Creating data sets for just math and reading scores
district_math_scores = complete_students_data[["student_name",
                                             "math_score"]]

district_reading_scores = complete_students_data[["student_name",
                                             "reading_score"]]

In [9]:
#print(district_math_scores.head())
#print(district_reading_scores.head())

In [10]:
# Place the passing determination into a new column inside the DataFrame
complete_students_data["Passing_Math"] = pd.cut(district_math_scores["math_score"], 
                                                bins, labels = score_labels)

In [11]:
# Place the passing determination into a new column inside the DataFrame
complete_students_data["Passing_Reading"] = pd.cut(district_reading_scores["reading_score"], 
                                                bins, labels = score_labels)

In [12]:
# Confirm the new columns were added to the original DataFrame
complete_students_data.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,Passing_Math,Passing_Reading
0,0,Paul Bradley,M,9th,Huang High School,66,79,Passing,Not Passing
1,1,Victor Smith,M,12th,Huang High School,94,61,Not Passing,Passing
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,Not Passing,Passing
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,Not Passing,Passing
4,4,Bonnie Ray,F,9th,Huang High School,97,84,Passing,Passing


In [13]:
# Determine the students in the district passing math, reading, and both
dist_students_passing_math = complete_students_data.loc[complete_students_data["Passing_Math"] == "Passing"]

dist_students_passing_reading = complete_students_data.loc[complete_students_data["Passing_Reading"] == "Passing"]

dist_students_pass_math_reading = dist_students_passing_math.loc[dist_students_passing_math["Passing_Reading"] == "Passing"]

math_pass_count = dist_students_passing_math["Passing_Math"].count()
reading_pass_count = dist_students_passing_reading["Passing_Reading"].count()
math_reading_pass_count = dist_students_pass_math_reading["Passing_Math"].count()

#print(math_pass_count)
#print(reading_pass_count)
#print(math_reading_pass_count)


In [14]:
# Calculate percentage of students in district passing math or reading
dist_pct_pass_math = (math_pass_count / student_count)

dist_pct_pass_reading = (reading_pass_count / student_count)

dist_pct_pass_math_reading = (math_reading_pass_count / student_count)

#print(dist_pct_pass_math)
#print(dist_pct_pass_reading)
#print(dist_pct_pass_math_reading)

In [15]:
district_summary = pd.DataFrame({"Total Schools": [school_count],
                                "Total Students": ["{:,}".format(student_count)],
                                "Total Budget": ["${0:,.2f}".format(district_budget)],
                                "Average Math Score": [avg_district_math_score],
                                "Average Reading Score": [avg_district_reading_score],
                                "% Passing Math": ["{:.2%}".format(dist_pct_pass_math)],
                                "% Passing Reading": ["{:.2%}".format(dist_pct_pass_reading)],
                                "% Overall Passing": ["{:.2%}".format(dist_pct_pass_math_reading)]})

district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,81.55%,92.37%,75.92%
