#                      PYCITY SCHOOLS SNAPSHOT

### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [None]:
# Set Student ID as the index & view the table
school_data_complete.set_index("Student ID")

# District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the overall passing rate (overall average score), i.e. (avg. math score + avg. reading score)/2

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

## Schools, Students and Budget

In [None]:
#total schools
total_schools = len(school_data_complete["school_name"].unique())

# total students
total_students = school_data_complete["Student ID"].count()

In [None]:
# total budget
total_budget = school_data["budget"].sum()
total_budget

## Calculating Scores

In [None]:
#..............................Calculating scores....................................
# Calculate the average math score
total_math_score = school_data_complete["math_score"].sum()
avg_math_score = total_math_score/total_students

# Calculate the average reading score
total_read_score = school_data_complete["reading_score"].sum()
avg_read_score = total_read_score/total_students

# Calculate the overall passing rate (overall average score), 
#i.e. (avg. math score + avg. reading score)/2
overal_passing_rate = (avg_math_score + avg_read_score)/2

## Passing Percentages

In [None]:
# ....Calculate the percentage of students with a passing math score (70 or greater)....
# Find out how many students pass & failed
math_scores = (school_data_complete["math_score"] >= 70).value_counts()

# Make df of scores, reset index of true and false, rename columns
math_scores_df = pd.DataFrame(math_scores).reset_index()
math_scores_rename = math_scores_df.rename(columns={"math_score":"student_count",
                                                   "index":"Passing Status: Math"})


# Filter for students who passed math and get the percentage
passed_math = math_scores_rename.iloc[0,:].sum()
percent_passed_math = passed_math*100/total_students

In [None]:
# ....Calculate the percentage of students with a passing reading score (70 or greater)....

# Find out how many students pass & failed
reading_scores = (school_data_complete["reading_score"] >= 70).value_counts()
reading_scores

# Make df of scores, reset index of true and false, rename columns
read_scores_df = pd.DataFrame(reading_scores).reset_index()
read_scores_rename = read_scores_df.rename(columns={"reading_score":"student_count",
                                                   "index":"Passing Status: Reading"})

# Filter for students who passed reading and get the percentage
passed_read = read_scores_rename.iloc[0,:].sum()
percent_passed_read = passed_read*100/total_students

## Summary

In [None]:
# Create a dataframe to hold the above results
district_summary_df = pd.DataFrame({"Total Schools": [total_schools],"Total Students": total_students,
                                    "Total Budget": [total_budget], "Average Math Score": avg_math_score,
                                   "Average Reading Score":avg_read_score, "Passing Math (%)": percent_passed_math,
                                   "Passing Reading(%)":percent_passed_read, 
                                   "Overall Passing Rate(%)":overal_passing_rate})
district_summary_df

## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)
  
* Create a dataframe to hold the above results

## Top Performing Schools (By Passing Rate)

* Sort and display the top five schools in overall passing rate

## Bottom Performing Schools (By Passing Rate)

* Sort and display the five worst-performing schools

## Math Scores by Grade

* Create a table that lists the average Math Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

In [None]:
# Filter scores by 9th grade
nine = student_id.loc[student_id["grade"]=="9th"]
nine_math_avg = nine.groupby("school_name").mean()
nine_math_avg_rename = nine_math_avg.rename(columns={"reading_score":"Grade 9 Reading Score",
                                                    "math_score":"Grade 9 Math Score"})

# Filter scores by 10th grade
ten = student_id.loc[student_id["grade"]=="10th"]
ten_math_avg = ten.groupby("school_name").mean()
ten_math_avg_rename = ten_math_avg.rename(columns={"reading_score":"Grade 10 Reading Score",
                            "math_score":"Grade 10 Math Score"})

# Filter scores by 11th grade
eleven = student_id.loc[student_id["grade"]=="11th"]
eleven_math_avg = eleven.groupby("school_name").mean()
eleven_math_avg_rename = eleven_math_avg.rename(columns={"reading_score":"Grade 11 Reading Score",
                                "math_score":"Grade 11 Math Score"})

# Filter scores by 12th grade
twelve = student_id.loc[student_id["grade"]=="12th"]
twelve_math_avg = twelve.groupby("school_name").mean()
twelve_math_avg_rename = twelve_math_avg.rename(columns={"reading_score":"Grade 12 Reading Score",
                                "math_score":"Grade 12 Math Score"})

In [None]:
# Merging 9th & 10th grade
nine_ten_avg = pd.merge(nine_math_avg_rename, ten_math_avg_rename, how="left", on=["school_name"])

# Merging 11th & 12th grade
eleven_twelve_avg = pd.merge(eleven_math_avg_rename, twelve_math_avg_rename, how="left", on=["school_name"])

# Merging all grades
school_score_avg = pd.merge(nine_ten_avg, eleven_twelve_avg, how="left", on=["school_name"])

In [None]:
# Filter grades to get math scores per school
math_avg = school_score_avg[["Grade 9 Math Score","Grade 10 Math Score",
                             "Grade 11 Math Score","Grade 12 Math Score"]]
math_avg

## Reading Score by Grade 

* Perform the same operations as above for reading scores

In [None]:
# Filter grades to get reading scores per school
reading_avg = school_score_avg[["Grade 9 Reading Score","Grade 10 Reading Score",
                             "Grade 11 Reading Score","Grade 12 Reading Score"]]
reading_avg

## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [None]:
# Sample bins. Feel free to create your own bins.
spending_bins = [0, 585, 615, 645, 675]
group_names = ["<$585", "$585-615", "$615-645", "$645-675"]

## Scores by School Size

* Perform the same operations as above, based on school size.

In [None]:
# Sample bins. Feel free to create your own bins.
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

## Scores by School Type

* Perform the same operations as above, based on school type.