In [227]:
#--Dependencies and Setup
import pandas as pd
import numpy as np

Delete line 10, 41

In [228]:
#--Point to csv files
school_path = "Resources/schools_complete.csv"
student_path = "Resources/students_complete.csv"

In [229]:
#--Read data and store in dataframe
school_df = pd.read_csv(school_path)
student_df = pd.read_csv(student_path)


In [230]:
#--Check number of rows and columns
school_df.shape

(15, 5)

In [231]:
#--Check missin data (if some rows is missing)
school_df.count()

School ID      15
school_name    15
type           15
size           15
budget         15
dtype: int64

In [232]:
#--Check number of rows and columns
student_df.shape

(39170, 7)

In [233]:
#--Check missin data (if some rows is missing)
student_df.count()

Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
dtype: int64

# --District Summary--##

In [234]:
schools_total = school_df["school_name"].count()
schools_total

15

In [235]:
students_total = student_df["student_name"].count()
students_total

39170

In [236]:
budget_total = school_df["budget"].sum()
budget_total

24649428

In [237]:
math_avg = student_df["math_score"].mean()
math_avg

78.98537145774827

In [238]:
reading_avg = student_df["reading_score"].mean()
reading_avg

81.87784018381414

In [239]:
overall_avg = (math_avg + reading_avg)/2
overall_avg

80.43160582078121

In [240]:
student_math70_greater = student_df.loc[student_df["math_score"]>=70]
student_math70_greater_count = student_math70_greater["Student ID"].count()
student_math70_greater_count

29370

In [241]:
percent_pass_math = (student_math70_greater_count/students_total)*100
percent_pass_math

74.9808526933878

In [242]:
student_reading70_greater = student_df.loc[student_df["reading_score"]>=70]
student_reading70_greater_count = student_reading70_greater["Student ID"].count()
student_reading70_greater_count


33610

In [243]:
percent_pass_reading = (student_reading70_greater_count/students_total)*100
percent_pass_reading

85.80546336482001

In [244]:
student_overall_70greater = student_df.loc[(student_df["math_score"]>=70) & (student_df["reading_score"]>=70)]
student_overall_70greater_count = student_overall_70greater["Student ID"].count()
student_overall_70greater_count

25528

In [245]:
#--calculate using | result = 95% using & result = 65%
percent_pass_overall = (student_overall_70greater_count/students_total)*100
percent_pass_overall

65.17232575950983

# --Disttrict Summary Report

In [246]:
#--Create Dataframe from variables

district_summary = {
                "Total Schools" : [schools_total],
                "Total Students": [students_total],
                "Total Budget" : [budget_total],
                "Average Math Score" : [math_avg],
                "Average Reading Score" :[reading_avg],
                "% Passing Math" : [percent_pass_math],
                "% Passing Reading": [percent_pass_reading],
                "% Overall Passing Rate" : [overall_avg]
                }
pd.DataFrame.from_dict(district_summary)

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,80.431606


# --School Summary--##

In [247]:
# student_df.head()


In [248]:
#--Assign student_df to school_sum for calculating 
school_sum = student_df[['school_name','student_name','reading_score','math_score']]
# school_sum.head()

In [249]:
school_sum.loc[school_sum["reading_score"] >= 70, "reading_pass"] = 1
school_sum.loc[school_sum["reading_score"] < 70, "reading_pass"] = 0
school_sum.loc[school_sum["math_score"] >= 70, "math_pass"] = 1
school_sum.loc[school_sum["math_score"] < 70, "math_pass"] = 0

In [250]:
#--Check all data are numeric
# school_sum.describe()

In [271]:
#--GroupBy data to find count, mean, and sum in dataframe
grouped_schools = school_sum.groupby("school_name", as_index=False).agg({'student_name':'count',
                                                                         'reading_score':'mean',
                                                                         'math_score':'mean',
                                                                         'reading_pass':'mean',
                                                                         'math_pass':'mean',
                                                                         
                                 }).rename(columns={'reading_score':'Average Reading Score',
                                                    'math_score':'Average Math Score',
                                                   })    
# grouped_schools.head()

In [252]:
#--Calculate percentage of passing reading, math and overall
grouped_schools['% Passing Math'] = grouped_schools["math_pass"]*100
grouped_schools['% Passing Reading'] = grouped_schools["reading_pass"]*100
grouped_schools['% Overall Passing Rate'] = (grouped_schools['% Passing Math']+grouped_schools['% Passing Reading'])/2

# grouped_schools.head(14)

In [253]:
#--Merge School_df with grouped_schools(calculate percentage of reading, math and overall)
school_summary_table = pd.merge(school_df,grouped_schools, on=["school_name","school_name"])
# school_summary_table.columns

In [254]:
#--Calculate clumn 'Per Student Budget'
student_count = school_summary_table["budget"]/school_summary_table["size"]
school_summary_table['student_per_budget'] = student_count
# school_summary_table.head()

# --School Summary Report

In [255]:
#--Select Column in dataframe and organize columns
school_summary_table1 = school_summary_table[["school_name","type","size",
                                              "budget","student_per_budget",
                                               'Average Math Score','Average Reading Score',
                                               '% Passing Math','% Passing Reading',
                                                '% Overall Passing Rate']]
#--Format numeric to two decimals
school_summary_table2=school_summary_table1.round({'Average Math Score':2,'Average Reading Score':2,
                                               '% Passing Math':2,'% Passing Reading':2,
                                                '% Overall Passing Rate':2})
#--Rename the columns
school_summary_report=school_summary_table2.rename(columns ={'school_name':'School Name',
                                                            'type':'School Type',
                                                            'size':'Total Student',
                                                             'budget':'Total School Budget',
                                                            'student_per_budget':'Per Student Budget'
                                                             })
#--Print by hiding index
school_summary_report.style.hide_index()


School Name,School Type,Total Student,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Huang High School,District,2917,1910635,655,76.63,81.18,65.68,81.32,73.5
Figueroa High School,District,2949,1884411,639,76.71,81.16,65.99,80.74,73.36
Shelton High School,Charter,1761,1056600,600,83.36,83.73,93.87,95.85,94.86
Hernandez High School,District,4635,3022020,652,77.29,80.93,66.75,80.86,73.81
Griffin High School,Charter,1468,917500,625,83.35,83.82,93.39,97.14,95.27
Wilson High School,Charter,2283,1319574,578,83.27,83.99,93.87,96.54,95.2
Cabrera High School,Charter,1858,1081356,582,83.06,83.98,94.13,97.04,95.59
Bailey High School,District,4976,3124928,628,77.05,81.03,66.68,81.93,74.31
Holden High School,Charter,427,248087,581,83.8,83.81,92.51,96.25,94.38
Pena High School,Charter,962,585858,609,83.84,84.04,94.59,95.95,95.27


# --Top Performing Schools (By Passing Rate)

In [256]:
#--Sort Data Decending 
top_School = school_summary_report.sort_values('% Overall Passing Rate',ascending=False)
top_School.style.hide_index()

School Name,School Type,Total Student,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Cabrera High School,Charter,1858,1081356,582,83.06,83.98,94.13,97.04,95.59
Thomas High School,Charter,1635,1043130,638,83.42,83.85,93.27,97.31,95.29
Griffin High School,Charter,1468,917500,625,83.35,83.82,93.39,97.14,95.27
Pena High School,Charter,962,585858,609,83.84,84.04,94.59,95.95,95.27
Wilson High School,Charter,2283,1319574,578,83.27,83.99,93.87,96.54,95.2
Wright High School,Charter,1800,1049400,583,83.68,83.96,93.33,96.61,94.97
Shelton High School,Charter,1761,1056600,600,83.36,83.73,93.87,95.85,94.86
Holden High School,Charter,427,248087,581,83.8,83.81,92.51,96.25,94.38
Bailey High School,District,4976,3124928,628,77.05,81.03,66.68,81.93,74.31
Hernandez High School,District,4635,3022020,652,77.29,80.93,66.75,80.86,73.81


# --Bottom Performing Schools (By Passing Rate)

In [257]:
#--Sort Data Ascending
bottom_School = school_summary_report.sort_values('% Overall Passing Rate',ascending=True)
bottom_School.style.hide_index()

School Name,School Type,Total Student,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Rodriguez High School,District,3999,2547363,637,76.84,80.74,66.37,80.22,73.29
Figueroa High School,District,2949,1884411,639,76.71,81.16,65.99,80.74,73.36
Huang High School,District,2917,1910635,655,76.63,81.18,65.68,81.32,73.5
Johnson High School,District,4761,3094650,650,77.07,80.97,66.06,81.22,73.64
Ford High School,District,2739,1763916,644,77.1,80.75,68.31,79.3,73.8
Hernandez High School,District,4635,3022020,652,77.29,80.93,66.75,80.86,73.81
Bailey High School,District,4976,3124928,628,77.05,81.03,66.68,81.93,74.31
Holden High School,Charter,427,248087,581,83.8,83.81,92.51,96.25,94.38
Shelton High School,Charter,1761,1056600,600,83.36,83.73,93.87,95.85,94.86
Wright High School,Charter,1800,1049400,583,83.68,83.96,93.33,96.61,94.97


# --Math Score by grades

In [258]:
#--Combine school_df and student_df
school_summary = pd.merge(school_df, student_df, how = "left", on=["school_name","school_name"])
school_summary.columns

Index(['School ID', 'school_name', 'type', 'size', 'budget', 'Student ID',
       'student_name', 'gender', 'grade', 'reading_score', 'math_score'],
      dtype='object')

# --Reading Score by Grade Report

In [264]:
reading_score_df = student_df[['school_name','grade','reading_score']]
# df.replace({'A': {0: 100, 4: 400}})
replace_reading_score = reading_score_df.replace({'grade': {'9th': 9, '10th': 10,'11th':11,'12th':12}})
replace_reading_score.head()

Unnamed: 0,school_name,grade,reading_score
0,Huang High School,9,66
1,Huang High School,12,94
2,Huang High School,12,90
3,Huang High School,12,67
4,Huang High School,9,97


# --Math Score by Grade

In [299]:
pivot_grade = pd.pivot_table(index='school_name',                             
                             columns='grade',
                             values='math_score')

TypeError: pivot_table() missing 1 required positional argument: 'data'

In [None]:
reading_score_df = student_df[['school_name','grade','math_score']]

replace_reading_score = reading_score_df.replace({'grade': {'9th': 9, '10th': 10,'11th':11,'12th':12}})
replace_reading_score.head()