In [12]:
import pandas as pd

In [13]:
# Files to load
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read files into DataFrame
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# combining data into single dataset

complete = pd.merge(student_data, school_data, how="left", on=['school_name', 'school_name'])

complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [14]:
#unique school calculation

school_count = complete["school_name"].nunique()
school_count

15

In [15]:
#total student calculation

student_count = complete["student_name"].count()
student_count

39170

In [16]:
#total budget calculation
total_budget = sum(pd.Series(school_data.loc[:,"budget"]))
total_budget


24649428

In [17]:
# Average math score
avg_math = complete["math_score"].mean()
avg_math

78.98537145774827

In [18]:
# Average reading score
avg_read = complete["reading_score"].mean()
avg_read

81.87784018381414

In [19]:
# Percentage of students who passed math
math_pass_count = complete[(complete["math_score"] >= 70)].count()["student_name"]
math_pass_pcnt = math_pass_count / float(student_count) *100
math_pass_pcnt

74.9808526933878

In [20]:
# Percentage of students who passed reading
read_pass_count = complete[(complete["reading_score"] >= 70)].count()["student_name"]
read_pass_pcnt = read_pass_count / float(student_count) *100
read_pass_pcnt

85.80546336482001

In [21]:
# Percentage of students who passed math and reading
passing_read_math_count = complete[(complete["math_score"] >= 70) & 
                                   (complete["reading_score"] >= 70)].count()["student_name"]
overall_pass_pcnt = passing_read_math_count / float(student_count) * 100
overall_pass_pcnt

65.17232575950983

In [22]:
# High level snapshot DataFrame
dist_summary_df = pd.DataFrame({"Total Schools":[school_count], "Total Students":[student_count], 
                                "Total Budget":[total_budget],
                                "Avg Math Score":[avg_math], "Avg Reading Score":[avg_read], 
                                "% Passing Math":[math_pass_pcnt], "% Passing Reading":[read_pass_pcnt],
                               "% Overall Passing":[overall_pass_pcnt]})

# Formatting
dist_summary_df["Total Students"] = dist_summary_df["Total Students"].map("{:,}".format)
dist_summary_df["Total Budget"] = dist_summary_df["Total Budget"].map("${:,.2f}".format)

#DataFrame
dist_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

In [23]:
# Use the code provided to select the school type
school_types = school_data.set_index(["school_name"])["type"]
school_types.head()

school_name
Huang High School        District
Figueroa High School     District
Shelton High School       Charter
Hernandez High School    District
Griffin High School       Charter
Name: type, dtype: object

In [24]:
# Student count per school
per_school = complete.groupby(["school_name"])
student_count_per = per_school["student_name"].count()
student_count_per

school_name
Bailey High School       4976
Cabrera High School      1858
Figueroa High School     2949
Ford High School         2739
Griffin High School      1468
Hernandez High School    4635
Holden High School        427
Huang High School        2917
Johnson High School      4761
Pena High School          962
Rodriguez High School    3999
Shelton High School      1761
Thomas High School       1635
Wilson High School       2283
Wright High School       1800
Name: student_name, dtype: int64

In [32]:
# Per Capita spending
school_budget_per = complete.groupby(["school_name"]).mean()["budget"]
school_capita_per = school_budget_per / student_count_per

In [33]:
# Average test scores
avg_math_per = per_school["math_score"].mean()
avg_read_per = per_school["reading_score"].mean()
avg_math_per

school_name
Bailey High School       77.048432
Cabrera High School      83.061895
Figueroa High School     76.711767
Ford High School         77.102592
Griffin High School      83.351499
Hernandez High School    77.289752
Holden High School       83.803279
Huang High School        76.629414
Johnson High School      77.072464
Pena High School         83.839917
Rodriguez High School    76.842711
Shelton High School      83.359455
Thomas High School       83.418349
Wilson High School       83.274201
Wright High School       83.682222
Name: math_score, dtype: float64

In [36]:
math_pass_per =(avg_math_per >= 70).count()
math_pass_per

15

In [38]:
read_pass_per =(avg_read_per >= 70).count()
read_pass_per

15

In [None]:

# Use the code provided to select new column data
average_math_score_by_type = type_math_scores["Avg Math Score"]
average_reading_score_by_type = type_reading_scores["Avg Reading Score"]
average_percent_passing_math_by_type = type_passing_math["% Passing Math"]
average_percent_passing_reading_by_type = type_passing_reading["% Passing Reading"]
average_percent_overall_passing_by_type = type_overall_passing["% Overall Passing"]