### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [305]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load
school_data_csv = "Resources/schools_complete.csv"
student_data_csv = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_csv)
student_data = pd.read_csv(student_data_csv)

# Combine the data into a single dataset
combined_data = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
#combined_data.head()  

## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the overall passing rate (overall average score), i.e. (avg. math score + avg. reading score)/2

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [306]:
#Find total list of schools
total_schools = combined_data["School ID"].nunique()
print(total_schools)

#Find the total number of students
total_students = combined_data["Student ID"].count()
print(total_students)

#Find the total budget for all schools
total = combined_data["budget"].astype(int)
total_budget = combined_data["budget"].unique()
print(total_budget)
budget = sum(total_budget)
print(budget)

#Average math score for all students
average_math = combined_data["math_score"].mean()
print(average_math)

#Average reading score for all students
average_reading = combined_data["reading_score"].mean()
print(average_reading)

#Overall passing
overall_passing = (average_math + average_reading)/2
print(overall_passing)

#Find the % of passing students for math
math = combined_data["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
percent_passing_math = (math_passing/total_students)*100
print(math_passing)
print(percent_passing_math)


#Find % passing for reading
read = combined_data["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
print(reading_passing)
percent_passing_reading = (reading_passing/total_students)*100
print(percent_passing_reading)

15
39170
[1910635 1884411 1056600 3022020  917500 1319574 1081356 3124928  248087
  585858 1049400 2547363 3094650 1763916 1043130]
24649428
78.98537145774827
81.87784018381414
80.43160582078121
29370
74.9808526933878
33610
85.80546336482001


In [307]:
district_summary = pd.DataFrame({"Total Schools": [total_schools],
                                   "Total Students": [total_students],
                                   "Total Budget": [budget],
                                   "Avg Math Score": [average_math],
                                   "Avg Reading Score": [average_reading],
                                   "% Passing Math": [percent_passing_math],
                                   "% Passing Reading": [percent_passing_reading],
                                    "Overall Passing": [overall_passing]
                                   })
district_summary = district_summary[["Total Schools",
                                         "Total Students",
                                         "Total Budget",
                                         "Avg Math Score",
                                         "Avg Reading Score",
                                         "% Passing Math",
                                         "% Passing Reading",
                                         "Overall Passing"]]
district_summary = district_summary.round(2)

district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,Overall Passing
0,15,39170,24649428,78.99,81.88,74.98,85.81,80.43


In [308]:
#district_summary["% Passing Math"] = district_summary["% Passing Math"].map("{0:,.0f}%".format)
#district_summary["% Passing Reading"] = district_summary["% Passing Reading"].map("{0:,.0f}%".format)
#district_summary["Total Budget"] = district_summary["Total Budget"].map("${0:,.0f}".format)
#district_summary

## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)
  
* Create a dataframe to hold the above results

In [309]:
#school_names = combined_data.groupby("school_name")
#school_names.head()
#school_names = school_names.mean()
#school_summary = school_names[["size", "math_score", "reading_score"]]
#print(school_summary)

#Find the % of passing students for math
math = combined_data["math_score"].astype(int)
total_students = combined_data["size"].astype(int)
total_student = total_students.mean()
math_passing = sum(i >= 70 for i in math)
percent_passing_math = (math_passing/total_student)*100


#Find % passing for reading
read = combined_data["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
percent_passing_reading = (reading_passing/total_student)*100

#Find overall passing
overall_passing = (percent_passing_reading + percent_passing_math)/2

#Find budget for each school and each student
Budget = combined_data["budget"].astype(int)
budget_per_student = Budget/total_student

types = school_data["type"]
schools1 = combined_data["school_name"]
math1 = combined_data["math_score"]
read1 = combined_data["reading_score"]
size = combined_data["size"]
school = school_data["school_name"]

In [310]:
schools = combined_data["school_name"].unique()
columns = ["Student ID", "student_name", "gender", "grade", "school_name", "reading_score", "math_score", "School ID", "type", "size", "budget"]


In [311]:
Huang_df = combined_data.loc[combined_data["school_name"] == "Huang High School", columns]
Huang_df
math = Huang_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Huang_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Huang_df["Student ID"].count()
huang_percent_math = (math_passing/students)*100
huang_percent_reading = (reading_passing/students)*100
huang_overall_passing = (huang_percent_math + huang_percent_reading)/2

Figueroa_df = combined_data.loc[combined_data["school_name"] == "Figueroa High School", columns]
Figueroa_df
math = Figueroa_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Figueroa_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Figueroa_df["Student ID"].count()
Figueroa_percent_math = (math_passing/students)*100
Figueroa_percent_reading = (reading_passing/students)*100
Figueroa_overall_passing = (Figueroa_percent_math + Figueroa_percent_reading)/2

Shelton_df = combined_data.loc[combined_data["school_name"] == "Shelton High School", columns]
Shelton_df
math = Shelton_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Shelton_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Shelton_df["Student ID"].count()
Shelton_percent_math = (math_passing/students)*100
Shelton_percent_reading = (reading_passing/students)*100
Shelton_overall_passing = (Shelton_percent_math + Shelton_percent_reading)/2

Hernandez_df = combined_data.loc[combined_data["school_name"] == "Hernandez High School", columns]
Hernandez_df
math = Hernandez_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Hernandez_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Hernandez_df["Student ID"].count()
Hernandez_percent_math = (math_passing/students)*100
Hernandez_percent_reading = (reading_passing/students)*100
Hernandez_overall_passing = (Hernandez_percent_math + Hernandez_percent_reading)/2

Griffin_df = combined_data.loc[combined_data["school_name"] == "Griffin High School", columns]
Griffin_df
math = Griffin_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Griffin_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Griffin_df["Student ID"].count()
Griffin_percent_math = (math_passing/students)*100
Griffin_percent_reading = (reading_passing/students)*100
Griffin_overall_passing = (Griffin_percent_math + Griffin_percent_reading)/2

Wilson_df = combined_data.loc[combined_data["school_name"] == "Wilson High School", columns]
Wilson_df
math = Wilson_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Wilson_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Wilson_df["Student ID"].count()
Wilson_percent_math = (math_passing/students)*100
Wilson_percent_reading = (reading_passing/students)*100
Wilson_overall_passing = (Wilson_percent_math + Wilson_percent_reading)/2

Cabrera_df = combined_data.loc[combined_data["school_name"] == "Cabrera High School", columns]
Cabrera_df
math = Cabrera_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Cabrera_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Cabrera_df["Student ID"].count()
Cabrera_percent_math = (math_passing/students)*100
Cabrera_percent_reading = (reading_passing/students)*100
Cabrera_overall_passing = (Cabrera_percent_math + Cabrera_percent_reading)/2

Bailey_df = combined_data.loc[combined_data["school_name"] == "Bailey High School", columns]
Bailey_df
math = Bailey_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Bailey_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Bailey_df["Student ID"].count()
Bailey_percent_math = (math_passing/students)*100
Bailey_percent_reading = (reading_passing/students)*100
Bailey_overall_passing = (Bailey_percent_math + Bailey_percent_reading)/2

Holden_df = combined_data.loc[combined_data["school_name"] == "Holden High School", columns]
Holden_df
math = Holden_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Holden_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Holden_df["Student ID"].count()
Holden_percent_math = (math_passing/students)*100
Holden_percent_reading = (reading_passing/students)*100
Holden_overall_passing = (Holden_percent_math + Holden_percent_reading)/2

Pena_df = combined_data.loc[combined_data["school_name"] == "Pena High School", columns]
Pena_df
math = Pena_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Pena_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Pena_df["Student ID"].count()
Pena_percent_math = (math_passing/students)*100
Pena_percent_reading = (reading_passing/students)*100
Pena_overall_passing = (Pena_percent_math + Pena_percent_reading)/2

Wright_df = combined_data.loc[combined_data["school_name"] == "Wright High School", columns]
Wright_df
math = Wright_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Wright_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Wright_df["Student ID"].count()
Wright_percent_math = (math_passing/students)*100
Wright_percent_reading = (reading_passing/students)*100
Wright_overall_passing = (Wright_percent_math + Wright_percent_reading)/2

Rodriguez_df = combined_data.loc[combined_data["school_name"] == "Rodriguez High School", columns]
Rodriguez_df
math = Rodriguez_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Rodriguez_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Rodriguez_df["Student ID"].count()
Rodriguez_percent_math = (math_passing/students)*100
Rodriguez_percent_reading = (reading_passing/students)*100
Rodriguez_overall_passing = (Rodriguez_percent_math + Rodriguez_percent_reading)/2

Johnson_df = combined_data.loc[combined_data["school_name"] == "Johnson High School", columns]
Johnson_df
math = Johnson_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Johnson_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Johnson_df["Student ID"].count()
Johnson_percent_math = (math_passing/students)*100
Johnson_percent_reading = (reading_passing/students)*100
Johnson_overall_passing = (Johnson_percent_math + Johnson_percent_reading)/2

Ford_df = combined_data.loc[combined_data["school_name"] == "Ford High School", columns]
Ford_df
math = Ford_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Ford_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Ford_df["Student ID"].count()
Ford_percent_math = (math_passing/students)*100
Ford_percent_reading = (reading_passing/students)*100
Ford_overall_passing = (Ford_percent_math + Ford_percent_reading)/2

Thomas_df = combined_data.loc[combined_data["school_name"] == "Thomas High School", columns]
Thomas_df
math = Thomas_df["math_score"].astype(int)
math_passing = sum(i >= 70 for i in math)
read = Thomas_df["reading_score"].astype(int)
reading_passing = sum(i >= 70 for i in read)
students = Thomas_df["Student ID"].count()
Thomas_percent_math = (math_passing/students)*100
Thomas_percent_reading = (reading_passing/students)*100
Thomas_overall_passing = (Thomas_percent_math + Thomas_percent_reading)/2

In [312]:
    
print(huang_percent_math)
print(Figueroa_percent_math)
print(Shelton_percent_math)
print(Hernandez_percent_math)
print(Griffin_percent_math)
print(Wilson_percent_math)
print(Cabrera_percent_math)
print(Bailey_percent_math)
print(Holden_percent_math)
print(Pena_percent_math)
print(Wright_percent_math)
print(Rodriguez_percent_math)
print(Johnson_percent_math)
print(Ford_percent_math)
print(Thomas_percent_math)

print("------------------------------")

print(huang_percent_reading)
print(Figueroa_percent_reading)
print(Shelton_percent_reading)
print(Hernandez_percent_reading)
print(Griffin_percent_reading)
print(Wilson_percent_reading)
print(Cabrera_percent_reading)
print(Bailey_percent_reading)
print(Holden_percent_reading)
print(Pena_percent_reading)
print(Wright_percent_reading)
print(Rodriguez_percent_reading)
print(Johnson_percent_reading)
print(Ford_percent_reading)
print(Thomas_percent_reading)

print("------------------------------")

print(huang_overall_passing)
print(Figueroa_overall_passing)
print(Shelton_overall_passing)
print(Hernandez_overall_passing)
print(Griffin_overall_passing)
print(Wilson_overall_passing)
print(Cabrera_overall_passing)
print(Bailey_overall_passing)
print(Holden_overall_passing)
print(Pena_overall_passing)
print(Wright_overall_passing)
print(Rodriguez_overall_passing)
print(Johnson_overall_passing)
print(Ford_overall_passing)
print(Thomas_overall_passing)

65.68392183750429
65.98847066802306
93.8671209540034
66.7529665587918
93.39237057220708
93.8677179150241
94.1334768568353
66.68006430868168
92.50585480093677
94.5945945945946
93.33333333333333
66.36659164791197
66.0575509346776
68.3096020445418
93.27217125382263
------------------------------
81.31642098045938
80.73923363852154
95.85462805224304
80.86299892125135
97.13896457765668
96.53964082347788
97.03982777179763
81.93327974276528
96.25292740046838
95.94594594594594
96.61111111111111
80.22005501375344
81.2224322621298
79.29901423877328
97.30886850152906
------------------------------
73.50017140898183
73.36385215327229
94.86087450312323
73.80798274002157
95.26566757493188
95.20367936925099
95.58665231431647
74.30667202572349
94.37939110070258
95.27027027027026
94.97222222222223
73.2933233308327
73.6399915984037
73.80430814165754
95.29051987767585


In [313]:
add_school_summary = pd.DataFrame({
                                   "school_name":(schools1),
                                   "math_score": (math1),
                                   "reading_score": (read1),
                                   "Budget per School": (Budget),
                                   "Budget per Student": (budget_per_student),
                                   "size": (size),
                                   })
add_school_summary = add_school_summary[[
                                    "school_name",
                                   "math_score",
                                   "reading_score",
                                    "Budget per Student",
                                   "Budget per School",
                                    "size",
                                        ]]
add_school_summary

school_names = add_school_summary.groupby("school_name")
school_names.head()
school_names = school_names.mean()
school_summary = school_names[["size", 
                               "math_score", 
                               "reading_score", 
                               "Budget per Student", 
                               "Budget per School",]] 
school_summary = school_summary.round(2)

total_school = pd.DataFrame({
                            "school_name":(school),
                            "type":(types)})
total_school = total_school [["school_name",
                             "type"]]

total_school = total_school.round(2)

combined_schools = pd.merge(school_summary, total_school, how='outer', on='school_name')
combined_schools

combined_schools_df = pd.DataFrame(combined_schools)
combined_schools_df

Passing = {"school_name": ['Huang High School', 'Figueroa High School', 'Shelton High School',
 'Hernandez High School', 'Griffin High School', 'Wilson High School',
 'Cabrera High School', 'Bailey High School', 'Holden High School',
 'Pena High School', 'Wright High School', 'Rodriguez High School',
 'Johnson High School', 'Ford High School', 'Thomas High School'],
           "% Passing Math": [65.68392183750429,
65.98847066802306,
93.8671209540034,
66.7529665587918,
93.39237057220708,
93.8677179150241,
94.1334768568353,
66.68006430868168,
92.50585480093677,
94.5945945945946,
93.33333333333333,
66.36659164791197,
66.0575509346776,
68.3096020445418,
93.27217125382263],
           "% Passing Reading": [81.31642098045938,
80.73923363852154,
95.85462805224304,
80.86299892125135,
97.13896457765668,
96.53964082347788,
97.03982777179763,
81.93327974276528,
96.25292740046838,
95.94594594594594,
96.61111111111111,
80.22005501375344,
81.2224322621298,
79.29901423877328,
97.30886850152906],
           "Overall Passing": [73.50017140898183,
73.36385215327229,
94.86087450312323,
73.80798274002157,
95.26566757493188,
95.20367936925099,
95.58665231431647,
74.30667202572349,
94.37939110070258,
95.27027027027026,
94.97222222222223,
73.2933233308327,
73.6399915984037,
73.80430814165754,
95.29051987767585]}

Passing_df = pd.DataFrame(Passing)
Passing_df = Passing_df.round(2)

total_combined_schools = pd.merge(combined_schools, Passing_df, how='outer', on='school_name')
total_combined_schools


Unnamed: 0,school_name,size,math_score,reading_score,Budget per Student,Budget per School,type,% Passing Math,% Passing Reading,Overall Passing
0,Bailey High School,4976.0,77.05,81.03,937.58,3124928,District,66.68,81.93,74.31
1,Cabrera High School,1858.0,83.06,83.98,324.44,1081356,Charter,94.13,97.04,95.59
2,Figueroa High School,2949.0,76.71,81.16,565.39,1884411,District,65.99,80.74,73.36
3,Ford High School,2739.0,77.1,80.75,529.23,1763916,District,68.31,79.3,73.8
4,Griffin High School,1468.0,83.35,83.82,275.28,917500,Charter,93.39,97.14,95.27
5,Hernandez High School,4635.0,77.29,80.93,906.71,3022020,District,66.75,80.86,73.81
6,Holden High School,427.0,83.8,83.81,74.43,248087,Charter,92.51,96.25,94.38
7,Huang High School,2917.0,76.63,81.18,573.26,1910635,District,65.68,81.32,73.5
8,Johnson High School,4761.0,77.07,80.97,928.5,3094650,District,66.06,81.22,73.64
9,Pena High School,962.0,83.84,84.04,175.78,585858,Charter,94.59,95.95,95.27


## Top Performing Schools (By Passing Rate)

* Sort and display the top five schools in overall passing rate

In [314]:
total_combined_schools = total_combined_schools.sort_values("Overall Passing", ascending=False)
total_combined_schools.head()
total_combined_schools = total_combined_schools.reset_index(drop=True)
total_combined_schools.head()

Unnamed: 0,school_name,size,math_score,reading_score,Budget per Student,Budget per School,type,% Passing Math,% Passing Reading,Overall Passing
0,Cabrera High School,1858.0,83.06,83.98,324.44,1081356,Charter,94.13,97.04,95.59
1,Thomas High School,1635.0,83.42,83.85,312.97,1043130,Charter,93.27,97.31,95.29
2,Griffin High School,1468.0,83.35,83.82,275.28,917500,Charter,93.39,97.14,95.27
3,Pena High School,962.0,83.84,84.04,175.78,585858,Charter,94.59,95.95,95.27
4,Wilson High School,2283.0,83.27,83.99,395.92,1319574,Charter,93.87,96.54,95.2


## Bottom Performing Schools (By Passing Rate)

* Sort and display the five worst-performing schools

In [315]:
total_combined_schools = total_combined_schools.sort_values("Overall Passing")
total_combined_schools.head()
total_combined_schools = total_combined_schools.reset_index(drop=True)
total_combined_schools.head()

Unnamed: 0,school_name,size,math_score,reading_score,Budget per Student,Budget per School,type,% Passing Math,% Passing Reading,Overall Passing
0,Rodriguez High School,3999.0,76.84,80.74,764.3,2547363,District,66.37,80.22,73.29
1,Figueroa High School,2949.0,76.71,81.16,565.39,1884411,District,65.99,80.74,73.36
2,Huang High School,2917.0,76.63,81.18,573.26,1910635,District,65.68,81.32,73.5
3,Johnson High School,4761.0,77.07,80.97,928.5,3094650,District,66.06,81.22,73.64
4,Ford High School,2739.0,77.1,80.75,529.23,1763916,District,68.31,79.3,73.8


## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

In [340]:
#total_school_summary = combined_data["school_name","grade", "math]
#total_school_summary
total_summary=combined_data["school_name", "grade", "math_score"]
total_summary.head()
total_school = total_summary.groupby(["school_name", "grade"])
total_school.mean()

  total_summary=total_school_summary["school_name", "grade", "math_score"]


AttributeError: 'DataFrameGroupBy' object has no attribute 'groupby'

## Reading Score by Grade 

* Perform the same operations as above for reading scores

## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [316]:
per_student_bins = [0, 301, 601, 901, 1000]
budget_labels = ["0-300", "301-600", "601-900", "900-1000"]

In [317]:
pd.cut(total_combined_schools["Budget per Student"], per_student_bins, labels=budget_labels)
total_combined_schools["Budget Bins"]=pd.cut(total_combined_schools["Budget per Student"], per_student_bins, labels=budget_labels)
total_combined_schools
reduced_combined_schools = total_combined_schools.iloc[:, [
    0, 2, 3, 7, 8, 9, 10,]]
reduced_combined_schools

Unnamed: 0,school_name,math_score,reading_score,% Passing Math,% Passing Reading,Overall Passing,Budget Bins
0,Rodriguez High School,76.84,80.74,66.37,80.22,73.29,601-900
1,Figueroa High School,76.71,81.16,65.99,80.74,73.36,301-600
2,Huang High School,76.63,81.18,65.68,81.32,73.5,301-600
3,Johnson High School,77.07,80.97,66.06,81.22,73.64,900-1000
4,Ford High School,77.1,80.75,68.31,79.3,73.8,301-600
5,Hernandez High School,77.29,80.93,66.75,80.86,73.81,900-1000
6,Bailey High School,77.05,81.03,66.68,81.93,74.31,900-1000
7,Holden High School,83.8,83.81,92.51,96.25,94.38,0-300
8,Shelton High School,83.36,83.73,93.87,95.85,94.86,301-600
9,Wright High School,83.68,83.96,93.33,96.61,94.97,301-600


## Scores by School Size

* Perform the same operations as above, based on school size.

In [318]:
# Sample bins. Feel free to create your own bins.
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

In [319]:
pd.cut(total_combined_schools["size"], size_bins, labels=group_names)
total_combined_schools["Size Bins"]=pd.cut(total_combined_schools["size"], size_bins, labels=group_names)
total_combined_schools
reduced_size_schools = total_combined_schools.iloc[:, [
    0, 2, 3, 7, 8, 9, 11,]]
reduced_size_schools

Unnamed: 0,school_name,math_score,reading_score,% Passing Math,% Passing Reading,Overall Passing,Size Bins
0,Rodriguez High School,76.84,80.74,66.37,80.22,73.29,Large (2000-5000)
1,Figueroa High School,76.71,81.16,65.99,80.74,73.36,Large (2000-5000)
2,Huang High School,76.63,81.18,65.68,81.32,73.5,Large (2000-5000)
3,Johnson High School,77.07,80.97,66.06,81.22,73.64,Large (2000-5000)
4,Ford High School,77.1,80.75,68.31,79.3,73.8,Large (2000-5000)
5,Hernandez High School,77.29,80.93,66.75,80.86,73.81,Large (2000-5000)
6,Bailey High School,77.05,81.03,66.68,81.93,74.31,Large (2000-5000)
7,Holden High School,83.8,83.81,92.51,96.25,94.38,Small (<1000)
8,Shelton High School,83.36,83.73,93.87,95.85,94.86,Medium (1000-2000)
9,Wright High School,83.68,83.96,93.33,96.61,94.97,Medium (1000-2000)


## Scores by School Type

* Perform the same operations as above, based on school type.

In [320]:
school_types = total_combined_schools.groupby("type")
school_types.head()
school_types = school_types.mean()
types_total = school_types[["math_score", 
                               "reading_score", 
                               "% Passing Math",
                                "% Passing Reading",
                                "Overall Passing",]] 
types_total

Unnamed: 0_level_0,math_score,reading_score,% Passing Math,% Passing Reading,Overall Passing
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.4725,83.8975,93.62,96.58625,95.10375
District,76.955714,80.965714,66.548571,80.798571,73.672857
