# City Schools Pandas Challenge 

This analysis of the district-wide standardized test results aggregates data containing student's math and reading 
scores, as well as various information on the schools they attend. The purpose is to showcase obvious trends in school 
performance.

In [2]:
# Dependencies
import pandas as pd

In [3]:
# Store filepath for the school file
school_file = "Resources/schools_complete.csv"

In [4]:
#Store filepath for the student file
student_file = "Resources/students_complete.csv"

In [5]:
# Read the school file with the pandas library
school_df = pd.read_csv(school_file)
school_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [6]:
# Read the student file with the pandas library
student_df = pd.read_csv(student_file)
student_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


# District Summary

In [7]:
# This is a high level snapshot of the district's key metrics.

In [8]:
#Find district summary information, like total schools
total_schools = school_df["school_name"].count()
#total_schools

In [9]:
total_students = school_df["size"].sum()
#total_students

In [10]:
total_budget = school_df["budget"].sum()
#total_budget

In [11]:
average_math_score = student_df["math_score"].mean()
#average_math_score

In [12]:
average_reading_score = student_df["reading_score"].mean()
#average_reading_score

In [13]:
# Find percent of students passing math, start with total scores:
#total_math_score = student_df["math_score"].count()
#total_math_score

In [14]:
# Create bins to hold pass/fail data for both math and reading
bins = [0, 59, 100]

bin_names = ["Fail", "Pass"]

In [15]:
# Identify math scores as either pass or fail and add to bins
binned_math = pd.cut(student_df['math_score'], bins, labels=bin_names)
# Create column in student data frame
student_df['math_passfail'] = binned_math
student_df

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,math_passfail
0,0,Paul Bradley,M,9th,Huang High School,66,79,Pass
1,1,Victor Smith,M,12th,Huang High School,94,61,Pass
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,Pass
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,Fail
4,4,Bonnie Ray,F,9th,Huang High School,97,84,Pass
...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,Pass
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,Pass
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,Pass
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,Pass


In [16]:
# Find total reading scores for percent passing reading function: 
#total_reading_score = student_df["reading_score"].count()
#total_reading_score

In [17]:
# Identify reading scores as either pass or fail and add to bins
binned_reading = pd.cut(student_df['reading_score'], bins, labels=bin_names)
# Create column in student data frame
student_df['reading_passfail'] = binned_reading
#student_df

In [18]:
# Find total number of students with passing math scores
math_pass = student_df['math_passfail'].value_counts('Pass')
#math_pass

In [19]:
# Find total number of students with passing reading scores
reading_pass = student_df['reading_passfail'].value_counts('Pass')
#reading_pass

In [20]:
#Find % of students overall passing
student_df["overall_passfail"] = student_df["math_passfail"].astype(str) + student_df["reading_passfail"].astype(str)
student_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,math_passfail,reading_passfail,overall_passfail
0,0,Paul Bradley,M,9th,Huang High School,66,79,Pass,Pass,PassPass
1,1,Victor Smith,M,12th,Huang High School,94,61,Pass,Pass,PassPass
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,Pass,Pass,PassPass
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,Fail,Pass,FailPass
4,4,Bonnie Ray,F,9th,Huang High School,97,84,Pass,Pass,PassPass


In [21]:
overall_pass = student_df['overall_passfail'].value_counts('PassPass')
#overall_pass

In [22]:
district_summary_df = pd.DataFrame(
    {"Total Schools": [total_schools], 
     "Total Students": [total_students],
     "Total Budget": [total_budget],
     "Average Math Score": [average_math_score],
     "Average Reading Score": [average_reading_score],
     "% Passing Math": [math_pass],
     "% Passing Reading": [reading_pass],
     "% Overall Passing": [overall_pass]
                       })
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,Pass 0.924457 Fail 0.075543 Name: math_p...,Pass 1.0 Fail 0.0 Name: reading_passfail...,PassPass 0.924457 FailPass 0.075543 Name...


# School Summary

In [23]:
# Serves as an overview of key metrics on each school.
#School Name
#School Type
#Total Students
#Total School Budget
#Per Student Budget
#Average Math Score
#Average Reading Score
#% Passing Math (The percentage of students that passed math.)
#% Passing Reading (The percentage of students that passed reading.)
#% Overall Passing (The percentage of students that passed math **and** reading.)

In [24]:
# Merge school dateframe with student dataframe using an outer join at school name
merge_df = pd.merge(school_df, student_df, on="school_name", how="outer")
merge_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score,math_passfail,reading_passfail,overall_passfail
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79,Pass,Pass,PassPass
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61,Pass,Pass,PassPass
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60,Pass,Pass,PassPass
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58,Fail,Pass,FailPass
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84,Pass,Pass,PassPass


In [25]:
# Add a column name Per_Student_Budget found by dividing buget by size, or number of students
merge_df["Per_Student_Budget"] = merge_df["budget"] / merge_df["size"]
merge_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79,Pass,Pass,PassPass,655.0
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61,Pass,Pass,PassPass,655.0
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60,Pass,Pass,PassPass,655.0
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58,Fail,Pass,FailPass,655.0
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84,Pass,Pass,PassPass,655.0


In [26]:
# Using astype(), convert passfail, binned columns data into string
merge_df.loc[:, "math_passfail"] = merge_df["math_passfail"].astype("str")
merge_df.loc[:, "reading_passfail"] = merge_df["reading_passfail"].astype("str")

merge_df.dtypes

School ID               int64
school_name            object
type                   object
size                    int64
budget                  int64
Student ID              int64
student_name           object
gender                 object
grade                  object
reading_score           int64
math_score              int64
math_passfail          object
reading_passfail       object
overall_passfail       object
Per_Student_Budget    float64
dtype: object

In [27]:
# Use .loc to find data on specific schools
huang_df = merge_df.loc[merge_df["school_name"] == "Huang High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
#huang_df.head()

In [28]:
# Gather data from huang_df for the Huang High School Summary Table
huang_name = huang_df["school_name"].unique()
huang_name

array(['Huang High School'], dtype=object)

In [29]:
huang_type = huang_df["type"].unique()
huang_type

array(['District'], dtype=object)

In [30]:
huang_size = huang_df["size"].unique()
huang_size

array([2917], dtype=int64)

In [31]:
huang_budget = huang_df["budget"].unique()
huang_budget

array([1910635], dtype=int64)

In [32]:
average_math_huang = huang_df["math_score"].mean()
average_math_huang

76.62941378128214

In [33]:
average_reading_huang = huang_df["reading_score"].mean()
average_reading_huang

81.18272197463148

In [34]:
# Find percent of Huang students with passing math scores
math_pass_huang = huang_df['math_passfail'].value_counts('Pass')
math_pass_huang

Pass    0.888584
Fail    0.111416
Name: math_passfail, dtype: float64

In [35]:
# Find percent of Huang students with passing reading scores
reading_pass_huang = huang_df['reading_passfail'].value_counts('Pass')
reading_pass_huang

Pass    1.0
Name: reading_passfail, dtype: float64

In [36]:
overall_pass_huang = huang_df['overall_passfail'].value_counts('PassPass')
overall_pass_huang

PassPass    0.888584
FailPass    0.111416
Name: overall_passfail, dtype: float64

In [37]:
# Print table with Huang Summary information:
huang_summary_df = pd.DataFrame(
    {"School Name": huang_name, 
     "School Type": huang_type,
     "Total Students": huang_size,
     "Total School Budget": huang_budget,
     "Average Math Score": [average_math_huang],
     "Average Reading Score": [average_reading_huang],
     "% Passing Math": [math_pass_huang],
     "% Passing Reading": [reading_pass_huang],
     "% Overall Passing": [overall_pass_huang]
                       })
huang_summary_df

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Huang High School,District,2917,1910635,76.629414,81.182722,Pass 0.888584 Fail 0.111416 Name: math_p...,"Pass 1.0 Name: reading_passfail, dtype: flo...",PassPass 0.888584 FailPass 0.111416 Name...


In [38]:
figueroa_df = merge_df.loc[merge_df["school_name"] == "Figueroa High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
figueroa_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
2917,Figueroa High School,District,2949,1884411,87,85,Pass,Pass,PassPass,639.0
2918,Figueroa High School,District,2949,1884411,84,97,Pass,Pass,PassPass,639.0
2919,Figueroa High School,District,2949,1884411,77,67,Pass,Pass,PassPass,639.0
2920,Figueroa High School,District,2949,1884411,64,97,Pass,Pass,PassPass,639.0
2921,Figueroa High School,District,2949,1884411,64,79,Pass,Pass,PassPass,639.0


In [39]:
# Gather data from huang_df for the Huang High School Summary Table
figueroa_name = figueroa_df["school_name"].unique()
figueroa_name

array(['Figueroa High School'], dtype=object)

In [40]:
figueroa_type = figueroa_df["type"].unique()
figueroa_type

array(['District'], dtype=object)

In [41]:
figueroa_size = figueroa_df["size"].unique()
figueroa_size

array([2949], dtype=int64)

In [42]:
figueroa_budget = figueroa_df["budget"].unique()
figueroa_budget

array([1884411], dtype=int64)

In [43]:
average_math_figueroa = figueroa_df["math_score"].mean()
average_math_figueroa

76.71176670057646

In [44]:
average_reading_figueroa = figueroa_df["reading_score"].mean()
average_reading_figueroa

81.15801966768396

In [45]:
# Find percent of Huang students with passing math scores
math_pass_figueroa = figueroa_df['math_passfail'].value_counts('Pass')
math_pass_figueroa

Pass    0.884368
Fail    0.115632
Name: math_passfail, dtype: float64

In [46]:
# Find percent of Huang students with passing reading scores
reading_pass_figueroa = figueroa_df['reading_passfail'].value_counts('Pass')
reading_pass_figueroa

Pass    1.0
Name: reading_passfail, dtype: float64

In [47]:
overall_pass_figueroa = figueroa_df['overall_passfail'].value_counts('PassPass')
overall_pass_figueroa

PassPass    0.884368
FailPass    0.115632
Name: overall_passfail, dtype: float64

In [48]:
# Print table with Huang Summary information:
figueroa_summary_df = pd.DataFrame(
    {"School Name": figueroa_name, 
     "School Tyoe": figueroa_type,
     "Total Students": figueroa_size,
     "Total School Budget": figueroa_budget,
     "Average Math Score": [average_math_figueroa],
     "Average Reading Score": [average_reading_figueroa],
     "% Passing Math": [math_pass_figueroa],
     "% Passing Reading": [reading_pass_figueroa],
     "% Overall Passing": [overall_pass_figueroa]
                       })
figueroa_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Figueroa High School,District,2949,1884411,76.711767,81.15802,Pass 0.884368 Fail 0.115632 Name: math_p...,"Pass 1.0 Name: reading_passfail, dtype: flo...",PassPass 0.884368 FailPass 0.115632 Name...


In [49]:
shelton_df = merge_df.loc[merge_df["school_name"] == "Shelton High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
shelton_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
5866,Shelton High School,Charter,1761,1056600,91,70,Pass,Pass,PassPass,600.0
5867,Shelton High School,Charter,1761,1056600,71,84,Pass,Pass,PassPass,600.0
5868,Shelton High School,Charter,1761,1056600,99,95,Pass,Pass,PassPass,600.0
5869,Shelton High School,Charter,1761,1056600,95,76,Pass,Pass,PassPass,600.0
5870,Shelton High School,Charter,1761,1056600,82,71,Pass,Pass,PassPass,600.0


In [50]:
# Gather data from huang_df for the Huang High School Summary Table
shelton_name = shelton_df["school_name"].unique()
shelton_name

array(['Shelton High School'], dtype=object)

In [51]:
shelton_type = shelton_df["type"].unique()
shelton_type

array(['Charter'], dtype=object)

In [52]:
shelton_size = shelton_df["size"].unique()
shelton_size

array([1761], dtype=int64)

In [53]:
shelton_budget = shelton_df["budget"].unique()
shelton_budget

array([1056600], dtype=int64)

In [54]:
average_math_shelton = shelton_df["math_score"].mean()
average_math_shelton

83.3594548551959

In [55]:
average_reading_shelton = shelton_df["reading_score"].mean()
average_reading_shelton

83.72572402044293

In [56]:
# Find percent of Huang students with passing math scores
math_pass_shelton = shelton_df['math_passfail'].value_counts('Pass')
math_pass_shelton

Pass    1.0
Name: math_passfail, dtype: float64

In [57]:
# Find percent of Huang students with passing reading scores
reading_pass_shelton = shelton_df['reading_passfail'].value_counts('Pass')
reading_pass_shelton

Pass    1.0
Name: reading_passfail, dtype: float64

In [58]:
overall_pass_shelton = shelton_df['overall_passfail'].value_counts('PassPass')
overall_pass_shelton

PassPass    1.0
Name: overall_passfail, dtype: float64

In [59]:
# Print table with Huang Summary information:
shelton_summary_df = pd.DataFrame(
    {"School Name": shelton_name, 
     "School Tyoe": shelton_type,
     "Total Students": shelton_size,
     "Total School Budget": shelton_budget,
     "Average Math Score": [average_math_shelton],
     "Average Reading Score": [average_reading_shelton],
     "% Passing Math": [math_pass_shelton],
     "% Passing Reading": [reading_pass_shelton],
     "% Overall Passing": [overall_pass_shelton]
                       })
shelton_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Shelton High School,Charter,1761,1056600,83.359455,83.725724,"Pass 1.0 Name: math_passfail, dtype: float64","Pass 1.0 Name: reading_passfail, dtype: flo...","PassPass 1.0 Name: overall_passfail, dtype:..."


In [60]:
hernandez_df = merge_df.loc[merge_df["school_name"] == "Hernandez High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
hernandez_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
7627,Hernandez High School,District,4635,3022020,88,70,Pass,Pass,PassPass,652.0
7628,Hernandez High School,District,4635,3022020,93,97,Pass,Pass,PassPass,652.0
7629,Hernandez High School,District,4635,3022020,81,83,Pass,Pass,PassPass,652.0
7630,Hernandez High School,District,4635,3022020,91,77,Pass,Pass,PassPass,652.0
7631,Hernandez High School,District,4635,3022020,93,84,Pass,Pass,PassPass,652.0


In [61]:
# Gather data from huang_df for the Huang High School Summary Table
hernandez_name = hernandez_df["school_name"].unique()
hernandez_name

array(['Hernandez High School'], dtype=object)

In [62]:
hernandez_type = hernandez_df["type"].unique()
hernandez_type

array(['District'], dtype=object)

In [63]:
hernandez_size = hernandez_df["size"].unique()
hernandez_size

array([4635], dtype=int64)

In [64]:
hernandez_budget = hernandez_df["budget"].unique()
hernandez_budget

array([3022020], dtype=int64)

In [65]:
average_math_hernandez = hernandez_df["math_score"].mean()
average_math_hernandez

77.28975188781014

In [66]:
average_reading_hernandez = hernandez_df["reading_score"].mean()
average_reading_hernandez

80.9344120819849

In [67]:
# Find percent of Huang students with passing math scores
math_pass_hernandez = hernandez_df['math_passfail'].value_counts('Pass')
math_pass_hernandez

Pass    0.890831
Fail    0.109169
Name: math_passfail, dtype: float64

In [68]:
# Find percent of Huang students with passing reading scores
reading_pass_hernandez = hernandez_df['reading_passfail'].value_counts('Pass')
reading_pass_hernandez

Pass    1.0
Name: reading_passfail, dtype: float64

In [69]:
overall_pass_hernandez = hernandez_df['overall_passfail'].value_counts('PassPass')
overall_pass_hernandez

PassPass    0.890831
FailPass    0.109169
Name: overall_passfail, dtype: float64

In [70]:
# Print table with Huang Summary information:
hernandez_summary_df = pd.DataFrame(
    {"School Name": hernandez_name, 
     "School Tyoe": hernandez_type,
     "Total Students": hernandez_size,
     "Total School Budget": hernandez_budget,
     "Average Math Score": [average_math_hernandez],
     "Average Reading Score": [average_reading_hernandez],
     "% Passing Math": [math_pass_hernandez],
     "% Passing Reading": [reading_pass_hernandez],
     "% Overall Passing": [overall_pass_hernandez]
                       })
hernandez_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Hernandez High School,District,4635,3022020,77.289752,80.934412,Pass 0.890831 Fail 0.109169 Name: math_p...,"Pass 1.0 Name: reading_passfail, dtype: flo...",PassPass 0.890831 FailPass 0.109169 Name...


In [74]:
griffin_df = merge_df.loc[merge_df["school_name"] == "Griffin High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
griffin_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
12262,Griffin High School,Charter,1468,917500,68,79,Pass,Pass,PassPass,625.0
12263,Griffin High School,Charter,1468,917500,81,91,Pass,Pass,PassPass,625.0
12264,Griffin High School,Charter,1468,917500,89,78,Pass,Pass,PassPass,625.0
12265,Griffin High School,Charter,1468,917500,85,91,Pass,Pass,PassPass,625.0
12266,Griffin High School,Charter,1468,917500,83,76,Pass,Pass,PassPass,625.0


In [75]:
# Gather data from huang_df for the Huang High School Summary Table
griffin_name = griffin_df["school_name"].unique()
griffin_name

array(['Griffin High School'], dtype=object)

In [76]:
griffin_type = griffin_df["type"].unique()
griffin_type

array(['Charter'], dtype=object)

In [77]:
griffin_size = griffin_df["size"].unique()
griffin_size

array([1468], dtype=int64)

In [78]:
griffin_budget = griffin_df["budget"].unique()
griffin_budget

array([917500], dtype=int64)

In [79]:
average_math_griffin = griffin_df["math_score"].mean()
average_math_griffin

83.35149863760218

In [80]:
average_reading_griffin = griffin_df["reading_score"].mean()
average_reading_griffin

83.816757493188

In [81]:
# Find percent of Huang students with passing math scores
math_pass_griffin = griffin_df['math_passfail'].value_counts('Pass')
math_pass_griffin

Pass    1.0
Name: math_passfail, dtype: float64

In [82]:
# Find percent of Huang students with passing reading scores
reading_pass_griffin = griffin_df['reading_passfail'].value_counts('Pass')
reading_pass_griffin

Pass    1.0
Name: reading_passfail, dtype: float64

In [83]:
overall_pass_griffin = griffin_df['overall_passfail'].value_counts('PassPass')
overall_pass_griffin

PassPass    1.0
Name: overall_passfail, dtype: float64

In [84]:
# Print table with Huang Summary information:
griffin_summary_df = pd.DataFrame(
    {"School Name": griffin_name, 
     "School Tyoe": griffin_type,
     "Total Students": griffin_size,
     "Total School Budget": griffin_budget,
     "Average Math Score": [average_math_griffin],
     "Average Reading Score": [average_reading_griffin],
     "% Passing Math": [math_pass_griffin],
     "% Passing Reading": [reading_pass_griffin],
     "% Overall Passing": [overall_pass_griffin]
                       })
griffin_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Griffin High School,Charter,1468,917500,83.351499,83.816757,"Pass 1.0 Name: math_passfail, dtype: float64","Pass 1.0 Name: reading_passfail, dtype: flo...","PassPass 1.0 Name: overall_passfail, dtype:..."


In [85]:
wilson_df = merge_df.loc[merge_df["school_name"] == "Wilson High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
wilson_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
13730,Wilson High School,Charter,2283,1319574,71,84,Pass,Pass,PassPass,578.0
13731,Wilson High School,Charter,2283,1319574,87,72,Pass,Pass,PassPass,578.0
13732,Wilson High School,Charter,2283,1319574,82,97,Pass,Pass,PassPass,578.0
13733,Wilson High School,Charter,2283,1319574,68,93,Pass,Pass,PassPass,578.0
13734,Wilson High School,Charter,2283,1319574,72,79,Pass,Pass,PassPass,578.0


In [86]:
# Gather data from huang_df for the Huang High School Summary Table
wilson_name = wilson_df["school_name"].unique()
wilson_name

array(['Wilson High School'], dtype=object)

In [87]:
wilson_type = wilson_df["type"].unique()
wilson_type

array(['Charter'], dtype=object)

In [88]:
wilson_size = wilson_df["size"].unique()
wilson_size

array([2283], dtype=int64)

In [89]:
wilson_budget = wilson_df["budget"].unique()
wilson_budget

array([1319574], dtype=int64)

In [90]:
average_math_wilson = wilson_df["math_score"].mean()
average_math_wilson

83.2742006132282

In [91]:
average_reading_wilson = wilson_df["reading_score"].mean()
average_reading_wilson

83.98948751642575

In [92]:
# Find percent of Huang students with passing math scores
math_pass_wilson = wilson_df['math_passfail'].value_counts('Pass')
math_pass_wilson

Pass    1.0
Name: math_passfail, dtype: float64

In [93]:
# Find percent of Huang students with passing reading scores
reading_pass_wilson = wilson_df['reading_passfail'].value_counts('Pass')
reading_pass_wilson

Pass    1.0
Name: reading_passfail, dtype: float64

In [94]:
overall_pass_wilson = wilson_df['overall_passfail'].value_counts('PassPass')
overall_pass_wilson

PassPass    1.0
Name: overall_passfail, dtype: float64

In [95]:
# Print table with Huang Summary information:
wilson_summary_df = pd.DataFrame(
    {"School Name": wilson_name, 
     "School Tyoe": wilson_type,
     "Total Students": wilson_size,
     "Total School Budget": wilson_budget,
     "Average Math Score": [average_math_wilson],
     "Average Reading Score": [average_reading_wilson],
     "% Passing Math": [math_pass_wilson],
     "% Passing Reading": [reading_pass_wilson],
     "% Overall Passing": [overall_pass_wilson]
                       })
wilson_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Wilson High School,Charter,2283,1319574,83.274201,83.989488,"Pass 1.0 Name: math_passfail, dtype: float64","Pass 1.0 Name: reading_passfail, dtype: flo...","PassPass 1.0 Name: overall_passfail, dtype:..."


In [96]:
cabrera_df = merge_df.loc[merge_df["school_name"] == "Cabrera High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
cabrera_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
16013,Cabrera High School,Charter,1858,1081356,94,94,Pass,Pass,PassPass,582.0
16014,Cabrera High School,Charter,1858,1081356,97,98,Pass,Pass,PassPass,582.0
16015,Cabrera High School,Charter,1858,1081356,83,78,Pass,Pass,PassPass,582.0
16016,Cabrera High School,Charter,1858,1081356,92,89,Pass,Pass,PassPass,582.0
16017,Cabrera High School,Charter,1858,1081356,71,96,Pass,Pass,PassPass,582.0


In [97]:
# Gather data from huang_df for the Huang High School Summary Table
cabrera_name = cabrera_df["school_name"].unique()
cabrera_name

array(['Cabrera High School'], dtype=object)

In [98]:
cabrera_type = cabrera_df["type"].unique()
cabrera_type

array(['Charter'], dtype=object)

In [99]:
cabrera_size = cabrera_df["size"].unique()
cabrera_size

array([1858], dtype=int64)

In [100]:
cabrera_budget = cabrera_df["budget"].unique()
cabrera_budget

array([1081356], dtype=int64)

In [101]:
average_math_cabrera = cabrera_df["math_score"].mean()
average_math_cabrera

83.06189451022605

In [102]:
average_reading_cabrera = cabrera_df["reading_score"].mean()
average_reading_cabrera

83.97578040904197

In [103]:
# Find percent of Huang students with passing math scores
math_pass_cabrera = cabrera_df['math_passfail'].value_counts('Pass')
math_pass_cabrera

Pass    1.0
Name: math_passfail, dtype: float64

In [104]:
# Find percent of Huang students with passing reading scores
reading_pass_cabrera = cabrera_df['reading_passfail'].value_counts('Pass')
reading_pass_cabrera

Pass    1.0
Name: reading_passfail, dtype: float64

In [105]:
overall_pass_cabrera = cabrera_df['overall_passfail'].value_counts('PassPass')
overall_pass_cabrera

PassPass    1.0
Name: overall_passfail, dtype: float64

In [106]:
# Print table with Huang Summary information:
cabrera_summary_df = pd.DataFrame(
    {"School Name": cabrera_name, 
     "School Tyoe": cabrera_type,
     "Total Students": cabrera_size,
     "Total School Budget": cabrera_budget,
     "Average Math Score": [average_math_cabrera],
     "Average Reading Score": [average_reading_cabrera],
     "% Passing Math": [math_pass_cabrera],
     "% Passing Reading": [reading_pass_cabrera],
     "% Overall Passing": [overall_pass_cabrera]
                       })
cabrera_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Cabrera High School,Charter,1858,1081356,83.061895,83.97578,"Pass 1.0 Name: math_passfail, dtype: float64","Pass 1.0 Name: reading_passfail, dtype: flo...","PassPass 1.0 Name: overall_passfail, dtype:..."


In [107]:
bailey_df = merge_df.loc[merge_df["school_name"] == "Bailey High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
bailey_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
17871,Bailey High School,District,4976,3124928,59,75,Fail,Pass,FailPass,628.0
17872,Bailey High School,District,4976,3124928,58,84,Fail,Pass,FailPass,628.0
17873,Bailey High School,District,4976,3124928,86,79,Pass,Pass,PassPass,628.0
17874,Bailey High School,District,4976,3124928,89,71,Pass,Pass,PassPass,628.0
17875,Bailey High School,District,4976,3124928,61,90,Pass,Pass,PassPass,628.0


In [108]:
# Gather data from huang_df for the Huang High School Summary Table
bailey_name = bailey_df["school_name"].unique()
bailey_name

array(['Bailey High School'], dtype=object)

In [109]:
bailey_type = bailey_df["type"].unique()
bailey_type

array(['District'], dtype=object)

In [110]:
bailey_size = bailey_df["size"].unique()
bailey_size

array([4976], dtype=int64)

In [111]:
bailey_budget = bailey_df["budget"].unique()
bailey_budget

array([3124928], dtype=int64)

In [112]:
average_math_bailey = bailey_df["math_score"].mean()
average_math_bailey

77.04843247588424

In [113]:
average_reading_bailey = bailey_df["reading_score"].mean()
average_reading_bailey

81.03396302250803

In [114]:
# Find percent of Huang students with passing math scores
math_pass_bailey = bailey_df['math_passfail'].value_counts('Pass')
math_pass_bailey

Pass    0.895297
Fail    0.104703
Name: math_passfail, dtype: float64

In [115]:
# Find percent of Huang students with passing reading scores
reading_pass_bailey = bailey_df['reading_passfail'].value_counts('Pass')
reading_pass_bailey

Pass    1.0
Name: reading_passfail, dtype: float64

In [116]:
overall_pass_bailey = bailey_df['overall_passfail'].value_counts('PassPass')
overall_pass_bailey

PassPass    0.895297
FailPass    0.104703
Name: overall_passfail, dtype: float64

In [117]:
# Print table with Huang Summary information:
bailey_summary_df = pd.DataFrame(
    {"School Name": bailey_name, 
     "School Tyoe": bailey_type,
     "Total Students": bailey_size,
     "Total School Budget": bailey_budget,
     "Average Math Score": [average_math_bailey],
     "Average Reading Score": [average_reading_bailey],
     "% Passing Math": [math_pass_bailey],
     "% Passing Reading": [reading_pass_bailey],
     "% Overall Passing": [overall_pass_bailey]
                       })
bailey_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Bailey High School,District,4976,3124928,77.048432,81.033963,Pass 0.895297 Fail 0.104703 Name: math_p...,"Pass 1.0 Name: reading_passfail, dtype: flo...",PassPass 0.895297 FailPass 0.104703 Name...


In [118]:
holden_df = merge_df.loc[merge_df["school_name"] == "Holden High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
holden_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
22847,Holden High School,Charter,427,248087,92,86,Pass,Pass,PassPass,581.0
22848,Holden High School,Charter,427,248087,81,91,Pass,Pass,PassPass,581.0
22849,Holden High School,Charter,427,248087,93,98,Pass,Pass,PassPass,581.0
22850,Holden High School,Charter,427,248087,90,80,Pass,Pass,PassPass,581.0
22851,Holden High School,Charter,427,248087,85,76,Pass,Pass,PassPass,581.0


In [119]:
# Gather data from huang_df for the Huang High School Summary Table
holden_name = holden_df["school_name"].unique()
holden_name

array(['Holden High School'], dtype=object)

In [120]:
holden_type = holden_df["type"].unique()
holden_type

array(['Charter'], dtype=object)

In [121]:
holden_size = holden_df["size"].unique()
holden_size

array([427], dtype=int64)

In [122]:
holden_budget = holden_df["budget"].unique()
holden_budget

array([248087], dtype=int64)

In [123]:
average_math_holden = holden_df["math_score"].mean()
average_math_holden

83.80327868852459

In [124]:
average_reading_holden = holden_df["reading_score"].mean()
average_reading_holden

83.81498829039812

In [125]:
# Find percent of Huang students with passing math scores
math_pass_holden = holden_df['math_passfail'].value_counts('Pass')
math_pass_holden

Pass    1.0
Name: math_passfail, dtype: float64

In [126]:
# Find percent of Huang students with passing reading scores
reading_pass_holden = holden_df['reading_passfail'].value_counts('Pass')
reading_pass_holden

Pass    1.0
Name: reading_passfail, dtype: float64

In [127]:
overall_pass_holden = holden_df['overall_passfail'].value_counts('PassPass')
overall_pass_holden

PassPass    1.0
Name: overall_passfail, dtype: float64

In [128]:
# Print table with Huang Summary information:
holden_summary_df = pd.DataFrame(
    {"School Name": holden_name, 
     "School Tyoe": holden_type,
     "Total Students": holden_size,
     "Total School Budget": holden_budget,
     "Average Math Score": [average_math_holden],
     "Average Reading Score": [average_reading_holden],
     "% Passing Math": [math_pass_holden],
     "% Passing Reading": [reading_pass_holden],
     "% Overall Passing": [overall_pass_holden]
                       })
holden_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Holden High School,Charter,427,248087,83.803279,83.814988,"Pass 1.0 Name: math_passfail, dtype: float64","Pass 1.0 Name: reading_passfail, dtype: flo...","PassPass 1.0 Name: overall_passfail, dtype:..."


In [129]:
pena_df = merge_df.loc[merge_df["school_name"] == "Pena High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
pena_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
23274,Pena High School,Charter,962,585858,75,91,Pass,Pass,PassPass,609.0
23275,Pena High School,Charter,962,585858,76,94,Pass,Pass,PassPass,609.0
23276,Pena High School,Charter,962,585858,91,98,Pass,Pass,PassPass,609.0
23277,Pena High School,Charter,962,585858,71,78,Pass,Pass,PassPass,609.0
23278,Pena High School,Charter,962,585858,92,87,Pass,Pass,PassPass,609.0


In [130]:
# Gather data from huang_df for the Huang High School Summary Table
pena_name = pena_df["school_name"].unique()
pena_name

array(['Pena High School'], dtype=object)

In [131]:
pena_type = pena_df["type"].unique()
pena_type

array(['Charter'], dtype=object)

In [132]:
pena_size = pena_df["size"].unique()
pena_size

array([962], dtype=int64)

In [133]:
pena_budget = pena_df["budget"].unique()
pena_budget

array([585858], dtype=int64)

In [134]:
average_math_pena = pena_df["math_score"].mean()
average_math_pena

83.83991683991684

In [135]:
average_reading_pena = pena_df["reading_score"].mean()
average_reading_pena

84.04469854469855

In [136]:
# Find percent of Huang students with passing math scores
math_pass_pena = pena_df['math_passfail'].value_counts('Pass')
math_pass_pena

Pass    1.0
Name: math_passfail, dtype: float64

In [137]:
# Find percent of Huang students with passing reading scores
reading_pass_pena = pena_df['reading_passfail'].value_counts('Pass')
reading_pass_pena

Pass    1.0
Name: reading_passfail, dtype: float64

In [138]:
overall_pass_pena = pena_df['overall_passfail'].value_counts('PassPass')
overall_pass_pena

PassPass    1.0
Name: overall_passfail, dtype: float64

In [139]:
# Print table with Huang Summary information:
pena_summary_df = pd.DataFrame(
    {"School Name": pena_name, 
     "School Tyoe": pena_type,
     "Total Students": pena_size,
     "Total School Budget": pena_budget,
     "Average Math Score": [average_math_pena],
     "Average Reading Score": [average_reading_pena],
     "% Passing Math": [math_pass_pena],
     "% Passing Reading": [reading_pass_pena],
     "% Overall Passing": [overall_pass_pena]
                       })
pena_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Pena High School,Charter,962,585858,83.839917,84.044699,"Pass 1.0 Name: math_passfail, dtype: float64","Pass 1.0 Name: reading_passfail, dtype: flo...","PassPass 1.0 Name: overall_passfail, dtype:..."


In [140]:
wright_df = merge_df.loc[merge_df["school_name"] == "Wright High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
wright_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
24236,Wright High School,Charter,1800,1049400,72,89,Pass,Pass,PassPass,583.0
24237,Wright High School,Charter,1800,1049400,93,84,Pass,Pass,PassPass,583.0
24238,Wright High School,Charter,1800,1049400,88,88,Pass,Pass,PassPass,583.0
24239,Wright High School,Charter,1800,1049400,84,75,Pass,Pass,PassPass,583.0
24240,Wright High School,Charter,1800,1049400,82,93,Pass,Pass,PassPass,583.0


In [141]:
# Gather data from huang_df for the Huang High School Summary Table
wright_name = wright_df["school_name"].unique()
wright_name

array(['Wright High School'], dtype=object)

In [142]:
wright_type = wright_df["type"].unique()
wright_type

array(['Charter'], dtype=object)

In [143]:
wright_size = wright_df["size"].unique()
wright_size

array([1800], dtype=int64)

In [144]:
wright_budget = wright_df["budget"].unique()
wright_budget

array([1049400], dtype=int64)

In [145]:
average_math_wright = wright_df["math_score"].mean()
average_math_wright

83.68222222222222

In [146]:
average_reading_wright = wright_df["reading_score"].mean()
average_reading_wright

83.955

In [147]:
# Find percent of Huang students with passing math scores
math_pass_wright = wright_df['math_passfail'].value_counts('Pass')
math_pass_wright

Pass    1.0
Name: math_passfail, dtype: float64

In [148]:
# Find percent of Huang students with passing reading scores
reading_pass_wright = wright_df['reading_passfail'].value_counts('Pass')
reading_pass_wright

Pass    1.0
Name: reading_passfail, dtype: float64

In [149]:
overall_pass_wright = wright_df['overall_passfail'].value_counts('PassPass')
overall_pass_wright

PassPass    1.0
Name: overall_passfail, dtype: float64

In [150]:
# Print table with Huang Summary information:
wright_summary_df = pd.DataFrame(
    {"School Name": wright_name, 
     "School Tyoe": wright_type,
     "Total Students": wright_size,
     "Total School Budget": wright_budget,
     "Average Math Score": [average_math_wright],
     "Average Reading Score": [average_reading_wright],
     "% Passing Math": [math_pass_wright],
     "% Passing Reading": [reading_pass_wright],
     "% Overall Passing": [overall_pass_wright]
                       })
wright_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Wright High School,Charter,1800,1049400,83.682222,83.955,"Pass 1.0 Name: math_passfail, dtype: float64","Pass 1.0 Name: reading_passfail, dtype: flo...","PassPass 1.0 Name: overall_passfail, dtype:..."


In [151]:
rodriguez_df = merge_df.loc[merge_df["school_name"] == "Rodriguez High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
rodriguez_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
26036,Rodriguez High School,District,3999,2547363,81,74,Pass,Pass,PassPass,637.0
26037,Rodriguez High School,District,3999,2547363,86,80,Pass,Pass,PassPass,637.0
26038,Rodriguez High School,District,3999,2547363,62,97,Pass,Pass,PassPass,637.0
26039,Rodriguez High School,District,3999,2547363,73,79,Pass,Pass,PassPass,637.0
26040,Rodriguez High School,District,3999,2547363,92,96,Pass,Pass,PassPass,637.0


In [152]:
# Gather data from huang_df for the Huang High School Summary Table
rodriguez_name = rodriguez_df["school_name"].unique()
rodriguez_name

array(['Rodriguez High School'], dtype=object)

In [153]:
rodriguez_type = rodriguez_df["type"].unique()
rodriguez_type

array(['District'], dtype=object)

In [154]:
rodriguez_size = rodriguez_df["size"].unique()
rodriguez_size

array([3999], dtype=int64)

In [155]:
rodriguez_budget = rodriguez_df["budget"].unique()
rodriguez_budget

array([2547363], dtype=int64)

In [156]:
average_math_rodriguez = rodriguez_df["math_score"].mean()
average_math_rodriguez

76.84271067766942

In [157]:
average_reading_rodriguez = rodriguez_df["reading_score"].mean()
average_reading_rodriguez

80.74468617154288

In [158]:
# Find percent of Huang students with passing math scores
math_pass_rodriguez = rodriguez_df['math_passfail'].value_counts('Pass')
math_pass_rodriguez

Pass    0.885471
Fail    0.114529
Name: math_passfail, dtype: float64

In [159]:
# Find percent of Huang students with passing reading scores
reading_pass_rodriguez = rodriguez_df['reading_passfail'].value_counts('Pass')
reading_pass_rodriguez

Pass    1.0
Name: reading_passfail, dtype: float64

In [160]:
overall_pass_rodriguez = rodriguez_df['overall_passfail'].value_counts('PassPass')
overall_pass_rodriguez

PassPass    0.885471
FailPass    0.114529
Name: overall_passfail, dtype: float64

In [161]:
# Print table with Huang Summary information:
rodriguez_summary_df = pd.DataFrame(
    {"School Name": rodriguez_name, 
     "School Tyoe": rodriguez_type,
     "Total Students": rodriguez_size,
     "Total School Budget": rodriguez_budget,
     "Average Math Score": [average_math_rodriguez],
     "Average Reading Score": [average_reading_rodriguez],
     "% Passing Math": [math_pass_rodriguez],
     "% Passing Reading": [reading_pass_rodriguez],
     "% Overall Passing": [overall_pass_rodriguez]
                       })
rodriguez_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Rodriguez High School,District,3999,2547363,76.842711,80.744686,Pass 0.885471 Fail 0.114529 Name: math_p...,"Pass 1.0 Name: reading_passfail, dtype: flo...",PassPass 0.885471 FailPass 0.114529 Name...


In [162]:
johnson_df = merge_df.loc[merge_df["school_name"] == "Johnson High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
johnson_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
30035,Johnson High School,District,4761,3094650,87,87,Pass,Pass,PassPass,650.0
30036,Johnson High School,District,4761,3094650,62,98,Pass,Pass,PassPass,650.0
30037,Johnson High School,District,4761,3094650,77,89,Pass,Pass,PassPass,650.0
30038,Johnson High School,District,4761,3094650,85,66,Pass,Pass,PassPass,650.0
30039,Johnson High School,District,4761,3094650,85,63,Pass,Pass,PassPass,650.0


In [163]:
# Gather data from huang_df for the Huang High School Summary Table
johnson_name = johnson_df["school_name"].unique()
johnson_name

array(['Johnson High School'], dtype=object)

In [164]:
johnson_type = johnson_df["type"].unique()
johnson_type

array(['District'], dtype=object)

In [165]:
johnson_size = johnson_df["size"].unique()
johnson_size

array([4761], dtype=int64)

In [166]:
johnson_budget = johnson_df["budget"].unique()
johnson_budget

array([3094650], dtype=int64)

In [167]:
average_math_johnson = johnson_df["math_score"].mean()
average_math_johnson

77.07246376811594

In [168]:
average_reading_johnson = johnson_df["reading_score"].mean()
average_reading_johnson

80.96639361478681

In [169]:
# Find percent of Huang students with passing math scores
math_pass_johnson = johnson_df['math_passfail'].value_counts('Pass')
math_pass_johnson

Pass    0.891829
Fail    0.108171
Name: math_passfail, dtype: float64

In [170]:
# Find percent of Huang students with passing reading scores
reading_pass_johnson = johnson_df['reading_passfail'].value_counts('Pass')
reading_pass_johnson

Pass    1.0
Name: reading_passfail, dtype: float64

In [171]:
overall_pass_johnson = johnson_df['overall_passfail'].value_counts('PassPass')
overall_pass_johnson

PassPass    0.891829
FailPass    0.108171
Name: overall_passfail, dtype: float64

In [172]:
# Print table with Huang Summary information:
johnson_summary_df = pd.DataFrame(
    {"School Name": johnson_name, 
     "School Tyoe": johnson_type,
     "Total Students": johnson_size,
     "Total School Budget": johnson_budget,
     "Average Math Score": [average_math_johnson],
     "Average Reading Score": [average_reading_johnson],
     "% Passing Math": [math_pass_johnson],
     "% Passing Reading": [reading_pass_johnson],
     "% Overall Passing": [overall_pass_johnson]
                       })
johnson_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Johnson High School,District,4761,3094650,77.072464,80.966394,Pass 0.891829 Fail 0.108171 Name: math_p...,"Pass 1.0 Name: reading_passfail, dtype: flo...",PassPass 0.891829 FailPass 0.108171 Name...


In [173]:
ford_df = merge_df.loc[merge_df["school_name"] == "Ford High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
ford_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
34796,Ford High School,District,2739,1763916,94,66,Pass,Pass,PassPass,644.0
34797,Ford High School,District,2739,1763916,63,68,Pass,Pass,PassPass,644.0
34798,Ford High School,District,2739,1763916,59,73,Fail,Pass,FailPass,644.0
34799,Ford High School,District,2739,1763916,58,88,Fail,Pass,FailPass,644.0
34800,Ford High School,District,2739,1763916,55,96,Fail,Pass,FailPass,644.0


In [174]:
# Gather data from huang_df for the Huang High School Summary Table
ford_name = ford_df["school_name"].unique()
ford_name

array(['Ford High School'], dtype=object)

In [175]:
ford_type = ford_df["type"].unique()
ford_type

array(['District'], dtype=object)

In [176]:
ford_size = ford_df["size"].unique()
ford_size

array([2739], dtype=int64)

In [177]:
ford_budget = ford_df["budget"].unique()
ford_budget

array([1763916], dtype=int64)

In [178]:
average_math_ford = ford_df["math_score"].mean()
average_math_ford

77.10259218692954

In [179]:
average_reading_ford = ford_df["reading_score"].mean()
average_reading_ford

80.74625775830594

In [180]:
# Find percent of Huang students with passing math scores
math_pass_ford = ford_df['math_passfail'].value_counts('Pass')
math_pass_ford

Pass    0.893027
Fail    0.106973
Name: math_passfail, dtype: float64

In [181]:
# Find percent of Huang students with passing reading scores
reading_pass_ford = ford_df['reading_passfail'].value_counts('Pass')
reading_pass_ford

Pass    1.0
Name: reading_passfail, dtype: float64

In [182]:
overall_pass_ford = ford_df['overall_passfail'].value_counts('PassPass')
overall_pass_ford

PassPass    0.893027
FailPass    0.106973
Name: overall_passfail, dtype: float64

In [183]:
# Print table with Huang Summary information:
ford_summary_df = pd.DataFrame(
    {"School Name": ford_name, 
     "School Tyoe": ford_type,
     "Total Students": ford_size,
     "Total School Budget": ford_budget,
     "Average Math Score": [average_math_ford],
     "Average Reading Score": [average_reading_ford],
     "% Passing Math": [math_pass_ford],
     "% Passing Reading": [reading_pass_ford],
     "% Overall Passing": [overall_pass_ford]
                       })
ford_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Ford High School,District,2739,1763916,77.102592,80.746258,Pass 0.893027 Fail 0.106973 Name: math_p...,"Pass 1.0 Name: reading_passfail, dtype: flo...",PassPass 0.893027 FailPass 0.106973 Name...


In [184]:
thomas_df = merge_df.loc[merge_df["school_name"] == "Thomas High School", ["school_name", "type", "size", "budget", "math_score", "reading_score", "math_passfail", "reading_passfail", "overall_passfail", "Per_Student_Budget"]]
thomas_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,math_passfail,reading_passfail,overall_passfail,Per_Student_Budget
37535,Thomas High School,Charter,1635,1043130,76,76,Pass,Pass,PassPass,638.0
37536,Thomas High School,Charter,1635,1043130,82,84,Pass,Pass,PassPass,638.0
37537,Thomas High School,Charter,1635,1043130,90,80,Pass,Pass,PassPass,638.0
37538,Thomas High School,Charter,1635,1043130,69,71,Pass,Pass,PassPass,638.0
37539,Thomas High School,Charter,1635,1043130,76,86,Pass,Pass,PassPass,638.0


In [185]:
# Gather data from huang_df for the Huang High School Summary Table
thomas_name = thomas_df["school_name"].unique()
thomas_name

array(['Thomas High School'], dtype=object)

In [186]:
thomas_type = thomas_df["type"].unique()
thomas_type

array(['Charter'], dtype=object)

In [187]:
thomas_size = thomas_df["size"].unique()
thomas_size

array([1635], dtype=int64)

In [188]:
thomas_budget = thomas_df["budget"].unique()
thomas_budget

array([1043130], dtype=int64)

In [189]:
average_math_thomas = thomas_df["math_score"].mean()
average_math_thomas

83.4183486238532

In [190]:
average_reading_thomas = thomas_df["reading_score"].mean()
average_reading_thomas

83.84892966360856

In [191]:
# Find percent of Huang students with passing math scores
math_pass_thomas = thomas_df['math_passfail'].value_counts('Pass')
math_pass_thomas

Pass    1.0
Name: math_passfail, dtype: float64

In [192]:
# Find percent of Huang students with passing reading scores
reading_pass_thomas = thomas_df['reading_passfail'].value_counts('Pass')
reading_pass_thomas

Pass    1.0
Name: reading_passfail, dtype: float64

In [193]:
overall_pass_thomas = thomas_df['overall_passfail'].value_counts('PassPass')
overall_pass_thomas

PassPass    1.0
Name: overall_passfail, dtype: float64

In [194]:
# Print table with Huang Summary information:
thomas_summary_df = pd.DataFrame(
    {"School Name": thomas_name, 
     "School Tyoe": thomas_type,
     "Total Students": thomas_size,
     "Total School Budget": thomas_budget,
     "Average Math Score": [average_math_thomas],
     "Average Reading Score": [average_reading_thomas],
     "% Passing Math": [math_pass_thomas],
     "% Passing Reading": [reading_pass_thomas],
     "% Overall Passing": [overall_pass_thomas]
                       })
thomas_summary_df

Unnamed: 0,School Name,School Tyoe,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Thomas High School,Charter,1635,1043130,83.418349,83.84893,"Pass 1.0 Name: math_passfail, dtype: float64","Pass 1.0 Name: reading_passfail, dtype: flo...","PassPass 1.0 Name: overall_passfail, dtype:..."


In [None]:
# I need to put all this informaiton into the same table. I'm afraid I miss understood the "School Summary table" 
# as meaning on for each school. I guess I could put all this back together. It's just not an efficient way to do this. 