### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [91]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data_df = pd.read_csv(school_data_to_load)
student_data_df = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete_df = pd.merge(student_data_df, school_data_df, how="left", on=["school_name", "school_name"])


In [181]:
school_data_complete_df.columns

Index(['Student ID', 'student_name', 'gender', 'year', 'school_name',
       'reading_score', 'maths_score', 'School ID', 'type', 'size', 'budget'],
      dtype='object')

In [92]:
school_data_complete_df.head()

Unnamed: 0,Student ID,student_name,gender,year,school_name,reading_score,maths_score,School ID,type,size,budget
0,0,Paul Bradley,M,9,Huang High School,96,94,0,Government,2917,1910635
1,1,Victor Smith,M,12,Huang High School,90,43,0,Government,2917,1910635
2,2,Kevin Rodriguez,M,12,Huang High School,41,76,0,Government,2917,1910635
3,3,Richard Scott,M,12,Huang High School,89,86,0,Government,2917,1910635
4,4,Bonnie Ray,F,9,Huang High School,87,69,0,Government,2917,1910635


In [184]:
total_number_of_schools = len(school_data_complete_df["school_name"].unique())
total_number_of_schools

15

In [190]:
total_number_of_students = school_data_df["size"].sum()
total_number_of_students

39170

In [95]:
total_budget = school_data_df["budget"].sum()
total_budget

24649428

In [96]:
average_maths_score = school_data_complete_df["maths_score"].mean()
average_maths_score

70.33819249425581

In [97]:
average_reading_score = school_data_complete_df["reading_score"].mean()
average_reading_score

69.98013786060761

In [161]:
maths_score_df = school_data_complete_df[["maths_score"]]
maths_score_df.head()

Unnamed: 0,maths_score
0,94
1,43
2,76
3,86
4,69


In [162]:
Passing_maths = maths_score_df.loc[maths_score_df.maths_score > 49].count()
Passing_maths


maths_score    33717
dtype: int64

In [163]:
Percent_Passing_maths = (Passing_maths/total_number_of_students)*100
Percent_Passing_maths

maths_score    86.078632
dtype: float64

In [164]:
reading_score_df = school_data_complete_df[["reading_score"]]
reading_score_df.head()

Unnamed: 0,reading_score
0,96
1,90
2,41
3,89
4,87


In [178]:
Passing_reading = reading_score_df.loc[reading_score_df.reading_score > 49].count()
Passing_reading

reading_score    33070
dtype: int64

In [166]:
Percent_Passing_reading = (Passing_reading/total_number_of_students)*100
Percent_Passing_reading

reading_score    84.426857
dtype: float64

In [169]:
total_passing_maths_reading = school_data_complete_df.loc[(maths_score_df["maths_score"] > 49) & (reading_score_df["reading_score"] > 49), :]
total_passing_maths_reading.count()

Student ID       28519
student_name     28519
gender           28519
year             28519
school_name      28519
reading_score    28519
maths_score      28519
School ID        28519
type             28519
size             28519
budget           28519
dtype: int64

In [170]:
total_passing_maths_reading = (total_passing_maths_reading.count()/total_number_of_students)*100
total_passing_maths_reading


Student ID       72.808272
student_name     72.808272
gender           72.808272
year             72.808272
school_name      72.808272
reading_score    72.808272
maths_score      72.808272
School ID        72.808272
type             72.808272
size             72.808272
budget           72.808272
dtype: float64

In [171]:
local_gov_area_summary_df = pd.DataFrame ({"Total Schools":[total_number_of_schools], "Total Students":[total_number_of_students], "Total Budget":[total_budget], "Average Maths Score":[average_maths_score], "Average Reading Score":[average_reading_score], "% Passing Maths":[Percent_Passing_maths], "% Passing Reading":[Percent_Passing_reading], "% Overall Passing":[total_passing_maths_reading]})
local_gov_area_summary_df.head()

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
0,15,39170,24649428,70.338192,69.980138,maths_score 86.078632 dtype: float64,reading_score 84.426857 dtype: float64,Student ID 72.808272 student_name 72...


## Local Government Area Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average maths score 

* Calculate the average reading score

* Calculate the percentage of students with a passing maths score (50 or greater)

* Calculate the percentage of students with a passing reading score (50 or greater)

* Calculate the percentage of students who passed maths **and** reading (% Overall Passing)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

## School Summary

* Create an overview table that summarises key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Maths Score
  * Average Reading Score
  * % Passing Maths
  * % Passing Reading
  * % Overall Passing (The percentage of students that passed maths **and** reading.)
  
* Create a dataframe to hold the above results

In [197]:
organized_df = school_data_complete_df[["school_name","type","size","budget",'year','maths_score','reading_score']]
organized_df.head()

Unnamed: 0,school_name,type,size,budget,year,maths_score,reading_score
0,Huang High School,Government,2917,1910635,9,94,96
1,Huang High School,Government,2917,1910635,12,43,90
2,Huang High School,Government,2917,1910635,12,76,41
3,Huang High School,Government,2917,1910635,12,86,89
4,Huang High School,Government,2917,1910635,9,69,87


In [198]:
renamed_df = organized_df.rename(columns={"school_name":"School Name","type":"School Type","size":"Total Students","budget":"Total School Budget",'year':"Year",'maths_score':"Maths Score",'reading_score':"Reading Score"})
renamed_df.head()

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Year,Maths Score,Reading Score
0,Huang High School,Government,2917,1910635,9,94,96
1,Huang High School,Government,2917,1910635,12,43,90
2,Huang High School,Government,2917,1910635,12,76,41
3,Huang High School,Government,2917,1910635,12,86,89
4,Huang High School,Government,2917,1910635,9,69,87


In [216]:
grouped_df = renamed_df.groupby(["School Name"])


In [None]:
grouped_df[[]]

In [213]:
school_summary_df = grouped_df.set_index("School Name")
school_summary_df.head()

AttributeError: 'DataFrameGroupBy' object has no attribute 'set_index'

In [None]:
school_summary_df([{"School Name":'school_name', "Total Students":'size', "Total Budget":'budget', "Average Maths Score":'average_maths_score', "Average Reading Score":'average_reading_score', "% Passing Maths":'Percent_Passing_maths', "% Passing Reading":'Percent_Passing_reading', "% Overall Passing":'total_passing_maths_reading'}])

In [None]:
'Student ID', 'student_name', 'gender', 'year', 'school_name',
       'reading_score', 'maths_score', 'School ID', 'type', 'size', 'budget'

In [183]:
school_summary_df = pd.DataFrame ([{"School Name":'school_name', "Total Students":'size', "Total Budget":'budget', "Average Maths Score":'average_maths_score', "Average Reading Score":'average_reading_score', "% Passing Maths":'Percent_Passing_maths', "% Passing Reading":'Percent_Passing_reading', "% Overall Passing":'total_passing_maths_reading'}])
school_summary_df.head()

Unnamed: 0,School Name,Total Students,Total Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
0,school_name,size,budget,average_maths_score,average_reading_score,Percent_Passing_maths,Percent_Passing_reading,total_passing_maths_reading


## Top Performing Schools (By % Overall Passing)

* Sort and display the top five performing schools by % overall passing.

## Bottom Performing Schools (By % Overall Passing)

* Sort and display the five worst-performing schools by % overall passing.

## Maths Scores by Year

* Create a table that lists the average maths score for students of each year level (9, 10, 11, 12) at each school.

  * Create a pandas series for each year. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

## Reading Score by Year

* Perform the same operations as above for reading scores

## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Maths Score
  * Average Reading Score
  * % Passing Maths
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

## Scores by School Size

* Perform the same operations as above, based on school size.

## Scores by School Type

* Perform the same operations as above, based on school type