In [None]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path

# File to load 
school_data_to_load = Path("../Resources/schools_complete.csv")
student_data_to_load = Path("../Resources/students_complete.csv")

# Read School and Student Data File and store into Pandas DataFrames

school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_student_data = pd.merge(student_data, school_data, how="outer", on="school_name")
school_student_data.head()


## Local Government Area (LGA) Summary


**Total number of unique schools** 

In [None]:
total_schools = school_student_data["school_name"].nunique()
total_schools

**Total students**

In [None]:
total_students = school_student_data["student_name"].count()
total_students = '{:,.0f}'.format(total_students)

**Total budget**

In [None]:

unique_df = school_student_data.drop_duplicates(subset=['school_name', 'budget'])
total_budget = unique_df["budget"].sum()
total_budget = '{:,.2f}'.format(total_budget)

**Average maths score**

In [None]:
average_maths_score = school_student_data["maths_score"].sum()/school_student_data["student_name"].count()
average_maths_score = '{:,.6f}'.format(average_maths_score)

**Average reading score**

In [None]:
average_reading_score = school_student_data["reading_score"].sum()/school_student_data["student_name"].count()
average_reading_score = '{:,.6f}'.format(average_reading_score)

**% passing maths**

In [None]:
total_students = school_student_data["student_name"].count()
passed_math_students = school_student_data[school_student_data['maths_score'] >= 50]['student_name'].count()
percentage_passed_math = (passed_math_students / total_students) * 100
percentage_passed_math = '{:,.6f}'.format(percentage_passed_math)

**% passing reading**

In [None]:
total_students = school_student_data["student_name"].count()
passed_reading_students = school_student_data[school_student_data['reading_score'] >= 50]['student_name'].count()
percentage_passed_reading = (passed_reading_students / total_students) * 100
percentage_passed_reading = '{:,.6f}'.format(percentage_passed_reading)


**% overall passing**

In [None]:
total_students = school_student_data["student_name"].count()
passed_both = school_student_data[(school_student_data['reading_score'] >= 50) & (school_student_data['maths_score'] >= 50) ]['student_name'].count()
percentage_passed_both = (passed_both / total_students) * 100
percentage_passed_both = '{:,.6f}'.format(percentage_passed_both)


**Area Summary**

In [None]:
metrics_dict = {
    'Total schools': total_schools,
    'Total students': total_students,
    'Total budget': total_budget,
    'Average maths score': average_maths_score,
    'Average reading score': average_reading_score,
    '% Passing maths': percentage_passed_math,
    '% Passing reading': percentage_passed_reading,
    '% Overall passing': percentage_passed_both}

area_summary= pd.DataFrame(metrics_dict, index=[0])
area_summary

## School Summary


In [None]:
grouped_school_data = school_student_data.groupby('school_name')
school_summary = pd.DataFrame({
    'School Type': grouped_school_data['type'].first(),
    'Total Students': grouped_school_data['Student ID'].count(),
    'Total School Budget': grouped_school_data['budget'].first().apply(lambda x: '{:,.2f}'.format(x)),
    'Per Student Budget': grouped_school_data['budget'].first() / grouped_school_data['Student ID'].count(),
    'Average Maths Score': grouped_school_data['maths_score'].mean(),
    'Average Reading Score': grouped_school_data['reading_score'].mean(),
    '% Passing Maths': grouped_school_data.apply(lambda x: (x['maths_score'] >= 50).mean() * 100),
    '% Passing Reading': grouped_school_data.apply(lambda x: (x['reading_score'] >= 50).mean() * 100),
    '% Overall passing': grouped_school_data.apply(lambda x: ((x['maths_score'] >= 50) & (x['reading_score'] >= 50)).mean() * 100),
})
school_summary

## Highest-Performing Schools (by % Overall Passing)


In [None]:
top_schools = school_summary.sort_values (by='% Overall passing', ascending = False).head()
top_schools

## Lowest-Performing Schools (by % Overall Passing)


In [None]:
bottom_schools = school_summary.sort_values (by='% Overall passing', ascending = True).head()
bottom_schools

## Maths Scores by Year


In [None]:
average_math_score_by_school = school_student_data.groupby(['school_name', 'year'])['maths_score'].mean().unstack()
average_math_score_by_school.columns = ['Year ' + str(col) for col in average_math_score_by_school.columns]
average_math_score_by_school

## Reading Scores by Year


In [None]:
average_reading_score_by_school = school_student_data.groupby(['school_name', 'year'])['reading_score'].mean().unstack()
average_reading_score_by_school.columns = ['Year ' + str(col) for col in average_reading_score_by_school.columns]
average_reading_score_by_school

## Scores by School Spending


In [None]:
spending_bins = [0, 585, 615, 645, 675]
spending_labels = ['<$585', '$585-615', '$615-645', '$645-675']
school_summary['Spending Ranges (Per Student)'] = pd.cut(school_summary['Per Student Budget'], spending_bins, labels=spending_labels)
spending_summary= school_summary.groupby('Spending Ranges (Per Student)')[['Average Maths Score', 'Average Reading Score', '% Passing Maths', '% Passing Reading', '% Overall passing']].mean()
spending_summary_rounded = spending_summary.round(2)
#print(spending_summary_rounded.to_string(index=True))
display(spending_summary_rounded)


## Scores by School Size


In [None]:
size_bins = [0, 1000, 2000, 5000]
size_labels = ['Small <$1000', 'Medium $1000-2000', 'Large $2000-5000']
school_summary['School Size'] = pd.cut(school_summary['Total Students'], size_bins, labels=size_labels)
size_summary= school_summary.groupby('School Size')[['Average Maths Score', 'Average Reading Score', '% Passing Maths', '% Passing Reading', '% Overall passing']].mean()
display(size_summary)

## Scores by School Type


In [None]:
type_bins = ["Government", "Independent"]

grouped_school_summary = school_summary.groupby('School Type')

type_summary = grouped_school_summary[['Average Maths Score', 'Average Reading Score', '% Passing Maths', '% Passing Reading', '% Overall passing']].mean()

display(type_summary)