### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [2]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import pandas._testing as tm

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

## Local Government Area Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average maths score 

* Calculate the average reading score

* Calculate the percentage of students with a passing maths score (50 or greater)

* Calculate the percentage of students with a passing reading score (50 or greater)

* Calculate the percentage of students who passed maths **and** reading (% Overall Passing)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [3]:
import pandas as pd

In [4]:
school_data_csv = "Resources/schools_complete.csv"
school_data_df = pd.read_csv(school_data_csv)

school_data_df.describe()

Unnamed: 0,School ID,size,budget
count,15.0,15.0,15.0
mean,7.0,2611.333333,1643295.0
std,4.472136,1420.915282,934776.3
min,0.0,427.0,248087.0
25%,3.5,1698.0,1046265.0
50%,7.0,2283.0,1319574.0
75%,10.5,3474.0,2228999.0
max,14.0,4976.0,3124928.0


In [5]:
student_data_csv = "Resources/students_complete.csv"
student_data_df = pd.read_csv(student_data_csv)
student_data_df.describe()

Unnamed: 0,Student ID,year,reading_score,maths_score
count,39170.0,39170.0,39170.0,39170.0
mean,19584.5,10.359586,69.980138,70.338192
std,11307.549359,1.102779,17.242346,16.910154
min,0.0,9.0,39.0,39.0
25%,9792.25,9.0,55.0,56.0
50%,19584.5,10.0,70.0,70.0
75%,29376.75,11.0,85.0,85.0
max,39169.0,12.0,99.0,99.0


In [6]:
student_with_school_combined_data_df = pd.merge(student_data_df, school_data_df, how="left", on=["school_name", "school_name"])
student_with_school_combined_data_df.head()

Unnamed: 0,Student ID,student_name,gender,year,school_name,reading_score,maths_score,School ID,type,size,budget
0,0,Paul Bradley,M,9,Huang High School,96,94,0,Government,2917,1910635
1,1,Victor Smith,M,12,Huang High School,90,43,0,Government,2917,1910635
2,2,Kevin Rodriguez,M,12,Huang High School,41,76,0,Government,2917,1910635
3,3,Richard Scott,M,12,Huang High School,89,86,0,Government,2917,1910635
4,4,Bonnie Ray,F,9,Huang High School,87,69,0,Government,2917,1910635


In [7]:
unique_schools = student_with_school_combined_data_df["school_name"].unique()
unique_schools

array(['Huang High School', 'Figueroa High School', 'Shelton High School',
       'Hernandez High School', 'Griffin High School',
       'Wilson High School', 'Cabrera High School', 'Bailey High School',
       'Holden High School', 'Pena High School', 'Wright High School',
       'Rodriguez High School', 'Johnson High School', 'Ford High School',
       'Thomas High School'], dtype=object)

In [8]:
total_budget_df = student_with_school_combined_data_df.groupby(['school_name', 'type'])["budget"].mean().astype(int)
total_budget_df.head()

school_name           type       
Bailey High School    Government     3124928
Cabrera High School   Independent    1081356
Figueroa High School  Government     1884411
Ford High School      Government     1763916
Griffin High School   Independent     917500
Name: budget, dtype: int32

In [9]:
LGA_Budget = "{:,.0f}".format(total_budget_df.sum().astype(int))
LGA_Budget =('$ ' +str(LGA_Budget))
print(LGA_Budget)

$ 24,649,428


In [10]:
LGA_schools = len(unique_schools)
LGA_schools

15

In [11]:
LGA_Students ="{:,.0f}".format(len(student_with_school_combined_data_df))
LGA_Students

'39,170'

In [12]:
LGA_MathScore_Avg = "{:.2f}".format(student_with_school_combined_data_df['maths_score'].mean())
LGA_MathScore_Avg

'70.34'

In [13]:
LGA_ReadingScore_Avg = "{:.2f}".format(student_with_school_combined_data_df['reading_score'].mean())
LGA_ReadingScore_Avg

'69.98'

In [14]:
LGA_math_pass = (((student_with_school_combined_data_df.where(student_with_school_combined_data_df.maths_score>=50)).count())/(len(student_with_school_combined_data_df))*100).map("{:,.2f}%".format)
LGA_math_pass = (LGA_math_pass.loc['maths_score'])
print(LGA_math_pass)

86.08%


In [15]:
LGA_reading_pass = (((student_with_school_combined_data_df.where(student_with_school_combined_data_df.reading_score>=50)).count())/(len(student_with_school_combined_data_df))*100).map("{:,.2f}%".format)
LGA_reading_pass=(LGA_reading_pass.loc['reading_score'])
print(LGA_reading_pass)

84.43%


In [16]:
pass_threshold = 50
conditions = [(student_with_school_combined_data_df.maths_score>=pass_threshold) & (student_with_school_combined_data_df.reading_score>=pass_threshold),(student_with_school_combined_data_df.maths_score<pass_threshold) & (student_with_school_combined_data_df.reading_score>=pass_threshold),(student_with_school_combined_data_df.maths_score>=pass_threshold) & (student_with_school_combined_data_df.reading_score<pass_threshold),(student_with_school_combined_data_df.maths_score<pass_threshold) & (student_with_school_combined_data_df.reading_score<pass_threshold)]
outcomes = ["Passed Both", "Passed Reading", "Passed Maths", "Neither Passed"]

student_with_school_combined_data_df['Pass Outcome']=np.select(conditions, outcomes, "ERROR")
student_with_school_combined_data_df=pd.DataFrame(student_with_school_combined_data_df)

In [17]:
LGA_Total_Pass="{:.2f}".format(((len(student_with_school_combined_data_df[student_with_school_combined_data_df['Pass Outcome']=='Passed Both']))/(len(student_with_school_combined_data_df)))*100)
LGA_Total_Pass

'72.81'

In [18]:
LGA_Table = {'Total Schools':LGA_schools, "Total Students":LGA_Students, "Total Budget": LGA_Budget, "Average Maths Score":LGA_MathScore_Avg, "Average Reading Score":LGA_ReadingScore_Avg , "%Passing Maths":LGA_math_pass, "%Passing Reading":LGA_reading_pass, "% Overall Passing":LGA_Total_Pass}
LGA_Table_df = pd.DataFrame.from_dict(LGA_Table, orient='index')
print(LGA_Table_df.transpose())

  Total Schools Total Students  Total Budget Average Maths Score  \
0            15         39,170  $ 24,649,428               70.34   

  Average Reading Score %Passing Maths %Passing Reading % Overall Passing  
0                 69.98         86.08%           84.43%             72.81  


## School Summary

In [19]:
LGA_Budget = "${:,.2f}".format(total_budget_df.sum())
LGA_Budget

'$24,649,428.00'

In [20]:
Average_Maths_Score = student_with_school_combined_data_df[["maths_score"]].mean().map("{:,.2f}".format)
print(Average_Maths_Score.loc['maths_score'])

70.34


In [21]:
total_Students_df = student_with_school_combined_data_df.groupby(['school_name', 'type'])["student_name"].count().astype(int)
total_Students_df

school_name            type       
Bailey High School     Government     4976
Cabrera High School    Independent    1858
Figueroa High School   Government     2949
Ford High School       Government     2739
Griffin High School    Independent    1468
Hernandez High School  Government     4635
Holden High School     Independent     427
Huang High School      Government     2917
Johnson High School    Government     4761
Pena High School       Independent     962
Rodriguez High School  Government     3999
Shelton High School    Independent    1761
Thomas High School     Independent    1635
Wilson High School     Independent    2283
Wright High School     Independent    1800
Name: student_name, dtype: int32

In [22]:
Percent_math_pass = (((student_with_school_combined_data_df.where(student_with_school_combined_data_df.maths_score>=50)).count())/(len(student_with_school_combined_data_df))*100).map("{:,.2f}%".format)
print(Percent_math_pass.loc['maths_score'])

86.08%


In [23]:
Average_Reading_Score = student_with_school_combined_data_df[["reading_score"]].mean().map("{:,.2f}".format)
print(Average_Reading_Score.loc['reading_score'])

69.98


In [24]:
Percent_reading_pass = (((student_with_school_combined_data_df.where(student_with_school_combined_data_df.reading_score>=50)).count())/(len(student_with_school_combined_data_df))*100).map("{:,.2f}%".format)
print(Percent_reading_pass.loc['reading_score'])

84.43%


In [25]:
Overall_Pass = "{:,.0f}".format((len(student_with_school_combined_data_df[(student_with_school_combined_data_df['reading_score']>=50) & (student_with_school_combined_data_df['maths_score']>=50)])))
Overall_Pass

'28,519'

In [26]:
LGA_summary_df = pd.DataFrame({
     "LGA Name": ["PyCitySchols LGA"],
    "Total Schools": [total_unique_schools],
    "Total Students": [Total_Students],
    "Total Budget": [LGA_Budget],
    "Average Maths Score": [(Average_Maths_Score.loc['maths_score'])],
    "Average Reading Score": [(Average_Reading_Score.loc['reading_score'])],
    "% Passing Maths": [(Percent_math_pass.loc['maths_score'])],
    "% Passing Reading": [(Percent_reading_pass.loc['reading_score'])],
    "% Overall Pass Mark": [LGA_Total_Pass]
      
})
LGA_summary_df

NameError: name 'total_unique_schools' is not defined

* Create an overview table that summarises key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Maths Score
  * Average Reading Score
  * % Passing Maths
  * % Passing Reading
  * % Overall Passing (The percentage of students that passed maths **and** reading.)
  
* Create a dataframe to hold the above results

In [None]:
Schools_summary1_df = student_with_school_combined_data_df.groupby(['school_name', 'type']).agg(
    T_Students=('student_name', np.size),
    T_School_Budget=('budget', np.mean),
    Av_Maths_Score=('maths_score', np.mean),
    Av_Reading_Score=('reading_score', np.mean))

Schools_summary1_df


Unnamed: 0_level_0,Unnamed: 1_level_0,T_Students,T_School_Budget,Av_Maths_Score,Av_Reading_Score
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bailey High School,Government,4976,3124928.0,72.352894,71.008842
Cabrera High School,Independent,1858,1081356.0,71.657158,71.359526
Figueroa High School,Government,2949,1884411.0,68.698542,69.077993
Ford High School,Government,2739,1763916.0,69.091274,69.572472
Griffin High School,Independent,1468,917500.0,71.788147,71.245232
Hernandez High School,Government,4635,3022020.0,68.874865,69.186408
Holden High School,Independent,427,248087.0,72.583138,71.660422
Huang High School,Government,2917,1910635.0,68.935207,68.910525
Johnson High School,Government,4761,3094650.0,68.8431,69.039277
Pena High School,Independent,962,585858.0,72.088358,71.613306


In [None]:
School_Percent_reading_pass = (((student_with_school_combined_data_df.where(student_with_school_combined_data_df.reading_score>=50)).count())/(len(student_with_school_combined_data_df))*100).map("{:,.2f}%".format)
print(Percent_reading_pass.loc['reading_score'])

84.43%


In [None]:
School_Percent_maths_pass = (((student_with_school_combined_data_df.where(student_with_school_combined_data_df.maths_score>=50)).count())/(len(student_with_school_combined_data_df))*100).map("{:,.2f}%".format)
print(Percent_reading_pass.loc['maths_score'])

84.43%


In [None]:
maths_passed = student_with_school_combined_data_df.where(student_with_school_combined_data_df.maths_score >=50).groupby('school_name').agg(No_passed_maths = ('school_name', np.size)).reset_index()
maths_passed


Unnamed: 0,school_name,No_passed_maths
0,Bailey High School,4560
1,Cabrera High School,1688
2,Figueroa High School,2408
3,Ford High School,2258
4,Griffin High School,1339
5,Hernandez High School,3752
6,Holden High School,384
7,Huang High School,2383
8,Johnson High School,3907
9,Pena High School,882


In [None]:
reading_passed = student_with_school_combined_data_df.where(student_with_school_combined_data_df.reading_score >=50).groupby('school_name').agg(No_passed_reading = ('school_name', np.size)).reset_index()
reading_passed


Unnamed: 0,school_name,No_passed_reading
0,Bailey High School,4348
1,Cabrera High School,1655
2,Figueroa High School,2442
3,Ford High School,2252
4,Griffin High School,1299
5,Hernandez High School,3795
6,Holden High School,378
7,Huang High School,2376
8,Johnson High School,3903
9,Pena High School,833


In [None]:
school_score_reading = pd.merge(Schools_summary1_df, reading_passed, how='left', on=['school_name', 'school_name'])
school_score_reading


Unnamed: 0,school_name,T_Students,T_School_Budget,Av_Maths_Score,Av_Reading_Score,No_passed_reading
0,Bailey High School,4976,3124928.0,72.352894,71.008842,4348
1,Cabrera High School,1858,1081356.0,71.657158,71.359526,1655
2,Figueroa High School,2949,1884411.0,68.698542,69.077993,2442
3,Ford High School,2739,1763916.0,69.091274,69.572472,2252
4,Griffin High School,1468,917500.0,71.788147,71.245232,1299
5,Hernandez High School,4635,3022020.0,68.874865,69.186408,3795
6,Holden High School,427,248087.0,72.583138,71.660422,378
7,Huang High School,2917,1910635.0,68.935207,68.910525,2376
8,Johnson High School,4761,3094650.0,68.8431,69.039277,3903
9,Pena High School,962,585858.0,72.088358,71.613306,833


In [None]:
school_score_both = pd.merge(school_score_reading, maths_passed, how='left', on=['school_name', 'school_name'])
school_score_both


Unnamed: 0,school_name,T_Students,T_School_Budget,Av_Maths_Score,Av_Reading_Score,No_passed_reading,No_passed_maths
0,Bailey High School,4976,3124928.0,72.352894,71.008842,4348,4560
1,Cabrera High School,1858,1081356.0,71.657158,71.359526,1655,1688
2,Figueroa High School,2949,1884411.0,68.698542,69.077993,2442,2408
3,Ford High School,2739,1763916.0,69.091274,69.572472,2252,2258
4,Griffin High School,1468,917500.0,71.788147,71.245232,1299,1339
5,Hernandez High School,4635,3022020.0,68.874865,69.186408,3795,3752
6,Holden High School,427,248087.0,72.583138,71.660422,378,384
7,Huang High School,2917,1910635.0,68.935207,68.910525,2376,2383
8,Johnson High School,4761,3094650.0,68.8431,69.039277,3903,3907
9,Pena High School,962,585858.0,72.088358,71.613306,833,882


In [None]:
school_score_both['Percentage passed Reading'] = ((school_score_both['No_passed_reading'] / school_score_both['T_Students'])*100)
school_score_both['Percentage passed maths'] = ((school_score_both['No_passed_maths'] / school_score_both['T_Students'])*100)
school_score_both

Unnamed: 0,school_name,T_Students,T_School_Budget,Av_Maths_Score,Av_Reading_Score,No_passed_reading,No_passed_maths,Percentage passed Reading,Percentage passed maths
0,Bailey High School,4976,3124928.0,72.352894,71.008842,4348,4560,87.379421,91.639871
1,Cabrera High School,1858,1081356.0,71.657158,71.359526,1655,1688,89.074273,90.850377
2,Figueroa High School,2949,1884411.0,68.698542,69.077993,2442,2408,82.807731,81.654798
3,Ford High School,2739,1763916.0,69.091274,69.572472,2252,2258,82.219788,82.438846
4,Griffin High School,1468,917500.0,71.788147,71.245232,1299,1339,88.487738,91.212534
5,Hernandez High School,4635,3022020.0,68.874865,69.186408,3795,3752,81.877023,80.949299
6,Holden High School,427,248087.0,72.583138,71.660422,378,384,88.52459,89.929742
7,Huang High School,2917,1910635.0,68.935207,68.910525,2376,2383,81.453548,81.693521
8,Johnson High School,4761,3094650.0,68.8431,69.039277,3903,3907,81.978576,82.062592
9,Pena High School,962,585858.0,72.088358,71.613306,833,882,86.590437,91.683992


In [None]:
overall_passed = student_with_school_combined_data_df.where((student_with_school_combined_data_df.maths_score >= 50) & (student_with_school_combined_data_df.reading_score >= 50)).groupby('school_name').agg(No_overall_pass = ('school_name', np.size)).reset_index()
overall_passed

Unnamed: 0,school_name,No_overall_pass
0,Bailey High School,3985
1,Cabrera High School,1501
2,Figueroa High School,1995
3,Ford High School,1848
4,Griffin High School,1194
5,Hernandez High School,3076
6,Holden High School,337
7,Huang High School,1946
8,Johnson High School,3199
9,Pena High School,762


In [None]:
school_score_both = pd.merge(school_score_both, overall_passed, how='left', on=['school_name', 'school_name'])
school_score_both

Unnamed: 0,school_name,T_Students,T_School_Budget,Av_Maths_Score,Av_Reading_Score,No_passed_reading,No_passed_maths,Percentage passed Reading,Percentage passed maths,No_overall_pass
0,Bailey High School,4976,3124928.0,72.352894,71.008842,4348,4560,87.379421,91.639871,3985
1,Cabrera High School,1858,1081356.0,71.657158,71.359526,1655,1688,89.074273,90.850377,1501
2,Figueroa High School,2949,1884411.0,68.698542,69.077993,2442,2408,82.807731,81.654798,1995
3,Ford High School,2739,1763916.0,69.091274,69.572472,2252,2258,82.219788,82.438846,1848
4,Griffin High School,1468,917500.0,71.788147,71.245232,1299,1339,88.487738,91.212534,1194
5,Hernandez High School,4635,3022020.0,68.874865,69.186408,3795,3752,81.877023,80.949299,3076
6,Holden High School,427,248087.0,72.583138,71.660422,378,384,88.52459,89.929742,337
7,Huang High School,2917,1910635.0,68.935207,68.910525,2376,2383,81.453548,81.693521,1946
8,Johnson High School,4761,3094650.0,68.8431,69.039277,3903,3907,81.978576,82.062592,3199
9,Pena High School,962,585858.0,72.088358,71.613306,833,882,86.590437,91.683992,762


In [None]:
school_score_both['Overall Pass'] = ((school_score_both['No_overall_pass'] / school_score_both['T_Students'])*100)
school_score_both

Unnamed: 0,school_name,T_Students,T_School_Budget,Av_Maths_Score,Av_Reading_Score,No_passed_reading,No_passed_maths,Percentage passed Reading,Percentage passed maths,No_overall_pass,Overall Pass
0,Bailey High School,4976,3124928.0,72.352894,71.008842,4348,4560,87.379421,91.639871,3985,80.084405
1,Cabrera High School,1858,1081356.0,71.657158,71.359526,1655,1688,89.074273,90.850377,1501,80.785791
2,Figueroa High School,2949,1884411.0,68.698542,69.077993,2442,2408,82.807731,81.654798,1995,67.650051
3,Ford High School,2739,1763916.0,69.091274,69.572472,2252,2258,82.219788,82.438846,1848,67.46988
4,Griffin High School,1468,917500.0,71.788147,71.245232,1299,1339,88.487738,91.212534,1194,81.33515
5,Hernandez High School,4635,3022020.0,68.874865,69.186408,3795,3752,81.877023,80.949299,3076,66.364617
6,Holden High School,427,248087.0,72.583138,71.660422,378,384,88.52459,89.929742,337,78.922717
7,Huang High School,2917,1910635.0,68.935207,68.910525,2376,2383,81.453548,81.693521,1946,66.712376
8,Johnson High School,4761,3094650.0,68.8431,69.039277,3903,3907,81.978576,82.062592,3199,67.191766
9,Pena High School,962,585858.0,72.088358,71.613306,833,882,86.590437,91.683992,762,79.209979


## Top Performing Schools (By % Overall Passing)

* Sort and display the top five performing schools by % overall passing.

In [None]:
Top_Sorted_schools_df = school_score_both.sort_values(by='Overall Pass', ascending=False)
Top_Sorted_schools_df.head(5)

Unnamed: 0,school_name,T_Students,T_School_Budget,Av_Maths_Score,Av_Reading_Score,Percentage passed Reading,Percentage passed maths,Overall Pass,Per Student Budget
4,Griffin High School,1468,917500.0,71.788147,71.245232,88.487738,91.212534,81.33515,625.0
1,Cabrera High School,1858,1081356.0,71.657158,71.359526,89.074273,90.850377,80.785791,582.0
0,Bailey High School,4976,3124928.0,72.352894,71.008842,87.379421,91.639871,80.084405,628.0
14,Wright High School,1800,1049400.0,72.047222,70.969444,86.666667,91.777778,79.722222,583.0
10,Rodriguez High School,3999,2547363.0,72.047762,70.935984,87.396849,90.797699,79.419855,637.0


## Bottom Performing Schools (By % Overall Passing)

* Sort and display the five worst-performing schools by % overall passing.

In [None]:
Bottom_Sorted_schools_df = school_score_both.sort_values(by='Overall Pass', ascending=True)
Bottom_Sorted_schools_df.head(5)

Unnamed: 0,school_name,T_Students,T_School_Budget,Av_Maths_Score,Av_Reading_Score,Percentage passed Reading,Percentage passed maths,Overall Pass,Per Student Budget
5,Hernandez High School,4635,3022020.0,68.874865,69.186408,81.877023,80.949299,66.364617,652.0
7,Huang High School,2917,1910635.0,68.935207,68.910525,81.453548,81.693521,66.712376,655.0
8,Johnson High School,4761,3094650.0,68.8431,69.039277,81.978576,82.062592,67.191766,650.0
13,Wilson High School,2283,1319574.0,69.170828,68.876916,81.29654,82.785808,67.455103,578.0
3,Ford High School,2739,1763916.0,69.091274,69.572472,82.219788,82.438846,67.46988,644.0


## Maths Scores by Year

* Create a table that lists the average maths score for students of each year level (9, 10, 11, 12) at each school.

  * Create a pandas series for each year. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

In [None]:
Schools_year_maths_df = student_with_school_combined_data_df.groupby(['school_name', 'type', 'year']).agg(
    Maths_Mean=('maths_score', np.mean))
Schools_year_maths_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Maths_Mean
school_name,type,year,Unnamed: 3_level_1
Bailey High School,Government,9,72.493827
Bailey High School,Government,10,71.897498
Bailey High School,Government,11,72.3749
Bailey High School,Government,12,72.675097
Cabrera High School,Independent,9,72.32197
Cabrera High School,Independent,10,72.437768
Cabrera High School,Independent,11,71.008299
Cabrera High School,Independent,12,70.604712
Figueroa High School,Government,9,68.477804
Figueroa High School,Government,10,68.331586


In [None]:
year_score_maths = student_with_school_combined_data_df.loc[:, ['school_name', 'year', 'maths_score']].groupby(['school_name', 'year']).agg(Ave_math_score = ('maths_score', np.average)).reset_index()
year_table_maths = year_score_maths.pivot(index='school_name', columns='year', values='Ave_math_score')
year_table_maths


year,9,10,11,12
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,72.493827,71.897498,72.3749,72.675097
Cabrera High School,72.32197,72.437768,71.008299,70.604712
Figueroa High School,68.477804,68.331586,68.811001,69.325282
Ford High School,69.021609,69.387006,69.248862,68.617811
Griffin High School,72.789731,71.093596,71.692521,71.469178
Hernandez High School,68.586831,68.867156,69.154412,68.985075
Holden High School,70.543307,75.105263,71.640777,73.409639
Huang High School,69.081754,68.533246,69.431345,68.639316
Johnson High School,69.469286,67.99022,68.63773,69.287393
Pena High School,71.996364,72.396,72.523438,71.187845


## Reading Score by Year

* Perform the same operations as above for reading scores

In [None]:
year_score_reading = student_with_school_combined_data_df.loc[:, ['school_name', 'year', 'reading_score']].groupby(['school_name', 'year']).agg(Ave_Reading_score = ('reading_score', np.average)).reset_index()
year_table_reading = year_score_reading.pivot(index='school_name', columns='year', values='Ave_Reading_score')
year_table_reading

year,9,10,11,12
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,70.90192,70.848265,70.317346,72.195525
Cabrera High School,71.172348,71.328326,71.201245,71.856021
Figueroa High School,70.261682,67.677588,69.152327,69.082126
Ford High School,69.615846,68.988701,70.735964,68.849722
Griffin High School,72.026895,70.746305,72.385042,69.434932
Hernandez High School,68.477569,70.621842,68.418199,69.244136
Holden High School,71.598425,71.096491,73.31068,70.481928
Huang High School,68.670616,69.516297,68.740638,68.671795
Johnson High School,68.719286,69.295029,69.969115,67.992521
Pena High School,70.949091,72.324,71.703125,71.513812


## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Maths Score
  * Average Reading Score
  * % Passing Maths
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [None]:
school_score2_df = school_score_both.reset_index()
school_score2_df

Unnamed: 0,index,school_name,T_Students,T_School_Budget,Av_Maths_Score,Av_Reading_Score,Percentage passed Reading,Percentage passed maths,Overall Pass,Per Student Budget
0,0,Bailey High School,4976,3124928.0,72.352894,71.008842,87.379421,91.639871,80.084405,628.0
1,1,Cabrera High School,1858,1081356.0,71.657158,71.359526,89.074273,90.850377,80.785791,582.0
2,2,Figueroa High School,2949,1884411.0,68.698542,69.077993,82.807731,81.654798,67.650051,639.0
3,3,Ford High School,2739,1763916.0,69.091274,69.572472,82.219788,82.438846,67.46988,644.0
4,4,Griffin High School,1468,917500.0,71.788147,71.245232,88.487738,91.212534,81.33515,625.0
5,5,Hernandez High School,4635,3022020.0,68.874865,69.186408,81.877023,80.949299,66.364617,652.0
6,6,Holden High School,427,248087.0,72.583138,71.660422,88.52459,89.929742,78.922717,581.0
7,7,Huang High School,2917,1910635.0,68.935207,68.910525,81.453548,81.693521,66.712376,655.0
8,8,Johnson High School,4761,3094650.0,68.8431,69.039277,81.978576,82.062592,67.191766,650.0
9,9,Pena High School,962,585858.0,72.088358,71.613306,86.590437,91.683992,79.209979,609.0


In [None]:
school_score2_df['overall passing rate']=(school_score2_df['Percentage passed Reading'] + school_score2_df['Percentage passed maths'])/2
school_score2_df

Unnamed: 0,index,school_name,T_Students,T_School_Budget,Av_Maths_Score,Av_Reading_Score,Percentage passed Reading,Percentage passed maths,Overall Pass,Per Student Budget,overall passing rate
0,0,Bailey High School,4976,3124928.0,72.352894,71.008842,87.379421,91.639871,80.084405,628.0,89.509646
1,1,Cabrera High School,1858,1081356.0,71.657158,71.359526,89.074273,90.850377,80.785791,582.0,89.962325
2,2,Figueroa High School,2949,1884411.0,68.698542,69.077993,82.807731,81.654798,67.650051,639.0,82.231265
3,3,Ford High School,2739,1763916.0,69.091274,69.572472,82.219788,82.438846,67.46988,644.0,82.329317
4,4,Griffin High School,1468,917500.0,71.788147,71.245232,88.487738,91.212534,81.33515,625.0,89.850136
5,5,Hernandez High School,4635,3022020.0,68.874865,69.186408,81.877023,80.949299,66.364617,652.0,81.413161
6,6,Holden High School,427,248087.0,72.583138,71.660422,88.52459,89.929742,78.922717,581.0,89.227166
7,7,Huang High School,2917,1910635.0,68.935207,68.910525,81.453548,81.693521,66.712376,655.0,81.573534
8,8,Johnson High School,4761,3094650.0,68.8431,69.039277,81.978576,82.062592,67.191766,650.0,82.020584
9,9,Pena High School,962,585858.0,72.088358,71.613306,86.590437,91.683992,79.209979,609.0,89.137214


In [None]:
spending_bins = [0, 585, 630, 645, 680]
labels = ["<$585", "$585-630", "$630-645", "$645-680"]
school_score2_df["School Spending per Student"] = pd.cut(school_score2_df["Per Student Budget"], spending_bins, labels=labels, include_lowest=True)
school_score2_df

NameError: name 'pd' is not defined

In [None]:
school_score3_df=school_score2_df.groupby("School Spending per Student")
school_score3_df.max()

Unnamed: 0_level_0,index,school_name,T_Students,T_School_Budget,Av_Maths_Score,Av_Reading_Score,Percentage passed Reading,Percentage passed maths,Overall Pass,Per Student Budget,overall passing rate
School Spending per Student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
<$585,14,Wright High School,2283,1319574.0,72.583138,71.660422,89.074273,91.777778,80.785791,583.0,89.962325
$585-630,11,Shelton High School,4976,3124928.0,72.352894,71.613306,88.487738,91.683992,81.33515,628.0,89.850136
$630-645,12,Thomas High School,3999,2547363.0,72.047762,70.935984,87.396849,90.797699,79.419855,644.0,89.097274
$645-680,8,Johnson High School,4761,3094650.0,68.935207,69.186408,81.978576,82.062592,67.191766,655.0,82.020584


In [None]:
###############spending_maths_scores = school_score2_df.groupby(["School Spending per Student"]).mean()["Av_Maths_Score"]
spending_reading_scores = school_score2_df.groupby(["School Spending per Student"]).mean()["Av_Reading_Score"]
spending_passing_maths = school_score2_df.groupby(["School Spending per Student"]).mean()["Percentage passed maths"]
spending_passing_reading = school_score2_df.groupby(["School Spending per Student"]).mean()["Percentage passed Reading"]
overall_passing_spending = school_score2_df.groupby(["School Spending per Student"]).mean()["overall passing rate"]
School_Binned1_df = pd.merge(spending_maths_scores, spending_reading_scores, how="left", on=["School Spending per Student","School Spending per Student"]).reset_index()
School_Binned2_df = pd.merge(School_Binned1_df, spending_passing_maths, how="left", on=["School Spending per Student","School Spending per Student"]).reset_index()
School_Binned3_df = pd.merge(School_Binned2_df, spending_passing_reading, how="left", on=["School Spending per Student","School Spending per Student"]).reset_index()
School_Binned4_df = pd.merge(School_Binned3_df, spending_passing_spending, how="left", on=["School Spending per Student","School Spending per Student"]).reset_index()
School_Binned4_df


ValueError: cannot insert School Spending per Student, already exists

## Scores by School Size

* Perform the same operations as above, based on school size.

Unnamed: 0_level_0,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),72.335748,71.636864,90.806867,87.557513,79.066348
Medium (1000-2000),71.42165,70.720164,89.84656,86.714149,78.039785
Large (2000-5000),69.751809,69.576052,84.252804,83.301185,70.293507


## Scores by School Type

* Perform the same operations as above, based on school type

Unnamed: 0_level_0,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Government,69.834806,69.675929,84.462375,83.587562,70.698993
Independent,71.368822,70.718933,89.204043,86.247789,76.97334
