### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [73]:
# Dependencies and Setup
import pandas as pd

In [74]:
# File to Load (Remember to Change These)
school_csv_path = "Resources/schools_complete.csv"
student_csv_path = "Resources/students_complete.csv"

In [75]:
# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_csv_path)
student_data = pd.read_csv(student_csv_path)

In [76]:
# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how = "left", on = ["school_name", "school_name"])

In [77]:
# For my info of Columns
school_data_complete.head(1) 

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635


In [78]:
# For my info of Columns
school_data.head(1)

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635


In [79]:
# For my info of Columns
student_data.head(1)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79


## District Summary

In [80]:
# --Calculate pass % for math---

# Criterion to pass math
to_pass_math = student_data[student_data.math_score > 70]  

# Percentage of Students meeting that
math_pass = ( to_pass_math['math_score'].count() / student_data['student_name'].count() ) * 100 
 

# --Calculate pass % for reading--
     
# Criterion to pass reading 
to_pass_reading = student_data[student_data.reading_score > 70]  
    
# Percentage of Students meeting that
reading_pass =( to_pass_reading['reading_score'].count() / student_data['student_name'].count() ) * 100  

# Create a dictionary of Key : Column and Row : Values 

Dist_dict = {'Total Schools': [school_data['school_name'].count()], 
     'Total Students': [student_data['student_name'].count()], 
     'Total Budget':[school_data['budget'].sum()], 
     'Average Math score':[round (student_data['math_score'].mean(),2)], 
     'Average Reading score':[round (student_data['reading_score'].mean(),2)],
     '% Passing Math':[round (math_pass,2)],
     '% Passing Reading':[round (reading_pass, 2)],
     '% Overall Passing Rate':[ round((math_pass + reading_pass)/2, 2)]
    }

# Convert that to a dataframe
Dist_summary_df = pd.DataFrame(Dist_dict)

# access all those cells using the dataframe.loc
Dist_summary_df = Dist_summary_df.loc[:,['Total Schools','Total Students','Total Budget','Average Math score',
                                       'Average Reading score','% Passing Math','% Passing Reading', '% Overall Passing Rate']]


# display! 

Dist_summary_df


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math score,Average Reading score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,24649428,78.99,81.88,72.39,82.97,77.68


## School Summary


In [81]:
# access the School_Data cells
Extract_School = school_data.loc[:,['school_name', 'type', 'size', 'budget']]

#Convert to a dataframe

School_Summary_df = pd.DataFrame(Extract_School)

# Rename columns accordingly

School_Summary_df.rename(columns={'school_name':'School-Name',
                                  'type': 'School Type',
                                  'size': 'Total Students',
                                  'budget': 'Total School Budget',
                                 }, inplace=True)

# Calculate per Student Budget
School_Summary_df['Per Student Budget'] = School_Summary_df['Total School Budget'] / School_Summary_df['Total Students']

# Display!

School_Summary_df

Unnamed: 0,School-Name,School Type,Total Students,Total School Budget,Per Student Budget
0,Huang High School,District,2917,1910635,655.0
1,Figueroa High School,District,2949,1884411,639.0
2,Shelton High School,Charter,1761,1056600,600.0
3,Hernandez High School,District,4635,3022020,652.0
4,Griffin High School,Charter,1468,917500,625.0
5,Wilson High School,Charter,2283,1319574,578.0
6,Cabrera High School,Charter,1858,1081356,582.0
7,Bailey High School,District,4976,3124928,628.0
8,Holden High School,Charter,427,248087,581.0
9,Pena High School,Charter,962,585858,609.0


## Top Performing Schools (By Passing Rate)

* Sort and display the top five schools in overall passing rate

## Bottom Performing Schools (By Passing Rate)

* Sort and display the five worst-performing schools

## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

## Reading Score by Grade 

* Perform the same operations as above for reading scores

## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [6]:
# Sample bins. Feel free to create your own bins.
spending_bins = [0, 585, 615, 645, 675]
group_names = ["<$585", "$585-615", "$615-645", "$645-675"]

## Scores by School Size

* Perform the same operations as above, based on school size.

In [7]:
# Sample bins. Feel free to create your own bins.
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

## Scores by School Type

* Perform the same operations as above, based on school type.