In [1]:
#Dependencies
import numpy as np
import pandas as pd

In [2]:
#Load both CSV files
loaded_schools = "Resources/schools_complete.csv"
loaded_students = "Resources/students_complete.csv"

In [3]:
#Read school and student files and store into Pandas DataFrames
schools_data = pd.read_csv(loaded_schools)
students_data = pd.read_csv(loaded_students)

In [4]:
#Combine de data files into a single dataset
complete_schools_data = pd.merge(students_data, schools_data, how="left", on=["school_name", "school_name"])
complete_schools_data.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [5]:
#Verify no info. is missing
complete_schools_data.count()

Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
School ID        39170
type             39170
size             39170
budget           39170
dtype: int64

In [6]:
#Rename column headers so they are easier to read
complete_schools_data = complete_schools_data.rename(columns={"student_name":"Student Name", "gender":"Gender",
                                                              "grade": "Grade", "school_name": "School Name",
                                                              "reading_score": "Reading Score", "math_score": "Math Score",
                                                             "type":"Type", "size": "Size", "budget": "Budget"})
complete_schools_data.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score,School ID,Type,Size,Budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [7]:
#Reorganize columns in an order that makes more sense to ease review of info.
complete_schools_data = complete_schools_data[["School Name", "School ID", "Type", "Size", "Budget", "Student Name", "Student ID",
                                               "Gender", "Grade", "Reading Score", "Math Score"]]
complete_schools_data.head()

Unnamed: 0,School Name,School ID,Type,Size,Budget,Student Name,Student ID,Gender,Grade,Reading Score,Math Score
0,Huang High School,0,District,2917,1910635,Paul Bradley,0,M,9th,66,79
1,Huang High School,0,District,2917,1910635,Victor Smith,1,M,12th,94,61
2,Huang High School,0,District,2917,1910635,Kevin Rodriguez,2,M,12th,90,60
3,Huang High School,0,District,2917,1910635,Dr. Richard Scott,3,M,12th,67,58
4,Huang High School,0,District,2917,1910635,Bonnie Ray,4,F,9th,97,84


In [8]:
#BEGIN DISTRICT SUMMARY OF HW
#Count the unique number of schools in the dataset to calculate the total # of schools in the district
count_unique_schools = complete_schools_data["School Name"].nunique()
count_unique_schools

15

In [9]:
#Separate the "School Name" and "Budget" columns to help analyze the district's total budget
district_budget_data = complete_schools_data.loc[:, ["School Name", "Budget"]]
district_budget_data

Unnamed: 0,School Name,Budget
0,Huang High School,1910635
1,Huang High School,1910635
2,Huang High School,1910635
3,Huang High School,1910635
4,Huang High School,1910635
...,...,...
39165,Thomas High School,1043130
39166,Thomas High School,1043130
39167,Thomas High School,1043130
39168,Thomas High School,1043130


In [10]:
#Eliminate the duplicates from these 2 columns to get an accurate district budget total in the following step
total_unique_district_data = district_budget_data.drop_duplicates(subset="School Name", keep="first")
total_unique_district_data

Unnamed: 0,School Name,Budget
0,Huang High School,1910635
2917,Figueroa High School,1884411
5866,Shelton High School,1056600
7627,Hernandez High School,3022020
12262,Griffin High School,917500
13730,Wilson High School,1319574
16013,Cabrera High School,1081356
17871,Bailey High School,3124928
22847,Holden High School,248087
23274,Pena High School,585858


In [11]:
#Calculate the total budget for the district
total_budget_data = total_unique_district_data["Budget"].sum()
total_budget_data

24649428

In [12]:
#Count the unique number of students in the district
total_unique_students = complete_schools_data["Student ID"].nunique()
total_unique_students

39170

In [13]:
#Separate "Student Name" and "Math Score" columns to begin average math score prompt
student_math_data = complete_schools_data.loc[:, ["Student Name", "Math Score"]]
student_math_data

Unnamed: 0,Student Name,Math Score
0,Paul Bradley,79
1,Victor Smith,61
2,Kevin Rodriguez,60
3,Dr. Richard Scott,58
4,Bonnie Ray,84
...,...,...
39165,Donna Howard,90
39166,Dawn Bell,70
39167,Rebecca Tanner,84
39168,Desiree Kidd,90


In [14]:
#Calculate the average math score in the district
avg_math_score = student_math_data["Math Score"].mean()
avg_math_score

78.98537145774827

In [15]:
#Separate "Student Name" and "Reading Score" columns to begin average reading score prompt
student_reading_data = complete_schools_data.loc[:, ["Student Name", "Reading Score"]]
student_reading_data

Unnamed: 0,Student Name,Reading Score
0,Paul Bradley,66
1,Victor Smith,94
2,Kevin Rodriguez,90
3,Dr. Richard Scott,67
4,Bonnie Ray,97
...,...,...
39165,Donna Howard,99
39166,Dawn Bell,95
39167,Rebecca Tanner,73
39168,Desiree Kidd,99


In [16]:
#Calculate the average reading score in the district
avg_reading_score = student_reading_data["Reading Score"].mean()
avg_reading_score

81.87784018381414

In [17]:
#Filter by those students with >= 70 math scores to begin % passing math prompt
passing_math_data = complete_schools_data.loc[complete_schools_data["Math Score"] >= 70, :]
passing_math_data

Unnamed: 0,School Name,School ID,Type,Size,Budget,Student Name,Student ID,Gender,Grade,Reading Score,Math Score
0,Huang High School,0,District,2917,1910635,Paul Bradley,0,M,9th,66,79
4,Huang High School,0,District,2917,1910635,Bonnie Ray,4,F,9th,97,84
5,Huang High School,0,District,2917,1910635,Bryan Miranda,5,M,9th,94,94
6,Huang High School,0,District,2917,1910635,Sheena Carter,6,F,11th,82,80
8,Huang High School,0,District,2917,1910635,Michael Roth,8,M,10th,95,87
...,...,...,...,...,...,...,...,...,...,...,...
39165,Thomas High School,14,Charter,1635,1043130,Donna Howard,39165,F,12th,99,90
39166,Thomas High School,14,Charter,1635,1043130,Dawn Bell,39166,F,10th,95,70
39167,Thomas High School,14,Charter,1635,1043130,Rebecca Tanner,39167,F,9th,73,84
39168,Thomas High School,14,Charter,1635,1043130,Desiree Kidd,39168,F,10th,99,90


In [18]:
#Count the number of students with math scores >= 70
passing_math = passing_math_data.shape[0]
passing_math

29370

In [19]:
#Filter by those students with >= 70 reading scores to begin % passing reading prompt
passing_reading_data = complete_schools_data.loc[complete_schools_data["Reading Score"] >= 70, :]
passing_reading_data

Unnamed: 0,School Name,School ID,Type,Size,Budget,Student Name,Student ID,Gender,Grade,Reading Score,Math Score
1,Huang High School,0,District,2917,1910635,Victor Smith,1,M,12th,94,61
2,Huang High School,0,District,2917,1910635,Kevin Rodriguez,2,M,12th,90,60
4,Huang High School,0,District,2917,1910635,Bonnie Ray,4,F,9th,97,84
5,Huang High School,0,District,2917,1910635,Bryan Miranda,5,M,9th,94,94
6,Huang High School,0,District,2917,1910635,Sheena Carter,6,F,11th,82,80
...,...,...,...,...,...,...,...,...,...,...,...
39165,Thomas High School,14,Charter,1635,1043130,Donna Howard,39165,F,12th,99,90
39166,Thomas High School,14,Charter,1635,1043130,Dawn Bell,39166,F,10th,95,70
39167,Thomas High School,14,Charter,1635,1043130,Rebecca Tanner,39167,F,9th,73,84
39168,Thomas High School,14,Charter,1635,1043130,Desiree Kidd,39168,F,10th,99,90


In [20]:
#Count the number of students with reading scores >= 70
passing_reading = passing_reading_data.shape[0]
passing_reading

33610

In [21]:
#Calculate the percent of students with >= 70 math scores
percent_passing_math = (passing_math/total_unique_students)*100
percent_passing_math

74.9808526933878

In [22]:
#Calculate the percent of students with >= 70 reading scores
percent_passing_reading = (passing_reading/total_unique_students)*100
percent_passing_reading

85.80546336482001

In [23]:
#Separate out all those students with >= 70 in both math and reading scores
overall_passing_data = complete_schools_data.loc[(complete_schools_data["Math Score"] >= 70) &
                                                 (complete_schools_data["Reading Score"] >= 70), :]
overall_passing_data

Unnamed: 0,School Name,School ID,Type,Size,Budget,Student Name,Student ID,Gender,Grade,Reading Score,Math Score
4,Huang High School,0,District,2917,1910635,Bonnie Ray,4,F,9th,97,84
5,Huang High School,0,District,2917,1910635,Bryan Miranda,5,M,9th,94,94
6,Huang High School,0,District,2917,1910635,Sheena Carter,6,F,11th,82,80
8,Huang High School,0,District,2917,1910635,Michael Roth,8,M,10th,95,87
9,Huang High School,0,District,2917,1910635,Matthew Greene,9,M,10th,96,84
...,...,...,...,...,...,...,...,...,...,...,...
39165,Thomas High School,14,Charter,1635,1043130,Donna Howard,39165,F,12th,99,90
39166,Thomas High School,14,Charter,1635,1043130,Dawn Bell,39166,F,10th,95,70
39167,Thomas High School,14,Charter,1635,1043130,Rebecca Tanner,39167,F,9th,73,84
39168,Thomas High School,14,Charter,1635,1043130,Desiree Kidd,39168,F,10th,99,90


In [24]:
#Count the number of students in this new dataset
overall_passing_students = overall_passing_data.shape[0]
overall_passing_students

25528

In [25]:
#Calculate the overall percent of students who have passing scores in both math and reading
percent_overall_passing = (overall_passing_students/total_unique_students)*100
percent_overall_passing

65.17232575950983

In [26]:
#Create a DataFrame to hold these results on the school district
school_district_dict = [{"Total Schools": "15", 
 "Total Students": "39,170", 
 "Total Budget" : "$24,649,428.00", 
 "Average Math Score" : "78.985371", 
 "Average Reading Score" : "81.87784", 
 "% Passing Math" : "74.980853", 
 "% Passing Reading" : "85.805463", 
 "% Overall Passing" : "65.172326"}]

school_district_df = pd.DataFrame(school_district_dict)
school_district_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


In [27]:
#BEGIN SCHOOL SPECIFIC SUMMARY OF HW
#Copy old DataFrame to create a new one for this section of the HW
copy_complete_schools_data = complete_schools_data.copy()
copy_complete_schools_data

Unnamed: 0,School Name,School ID,Type,Size,Budget,Student Name,Student ID,Gender,Grade,Reading Score,Math Score
0,Huang High School,0,District,2917,1910635,Paul Bradley,0,M,9th,66,79
1,Huang High School,0,District,2917,1910635,Victor Smith,1,M,12th,94,61
2,Huang High School,0,District,2917,1910635,Kevin Rodriguez,2,M,12th,90,60
3,Huang High School,0,District,2917,1910635,Dr. Richard Scott,3,M,12th,67,58
4,Huang High School,0,District,2917,1910635,Bonnie Ray,4,F,9th,97,84
...,...,...,...,...,...,...,...,...,...,...,...
39165,Thomas High School,14,Charter,1635,1043130,Donna Howard,39165,F,12th,99,90
39166,Thomas High School,14,Charter,1635,1043130,Dawn Bell,39166,F,10th,95,70
39167,Thomas High School,14,Charter,1635,1043130,Rebecca Tanner,39167,F,9th,73,84
39168,Thomas High School,14,Charter,1635,1043130,Desiree Kidd,39168,F,10th,99,90


In [28]:
#Organize the DataFrame by schools
complete_schools_data2 = copy_complete_schools_data.sort_values("School Name")
complete_schools_data2.head()

Unnamed: 0,School Name,School ID,Type,Size,Budget,Student Name,Student ID,Gender,Grade,Reading Score,Math Score
19584,Bailey High School,7,District,4976,3124928,Tammie Fox,19584,F,11th,82,92
21193,Bailey High School,7,District,4976,3124928,Jennifer Murray,21193,F,9th,88,89
21192,Bailey High School,7,District,4976,3124928,Lisa Pineda,21192,F,9th,86,67
21191,Bailey High School,7,District,4976,3124928,Cameron Miller,21191,M,11th,70,75
21190,Bailey High School,7,District,4976,3124928,Thomas Rasmussen,21190,M,12th,77,82


In [29]:
#Remove the "School ID" column from DataFrame
updated_complete_schools_data = complete_schools_data2.drop("School ID", axis=1)
updated_complete_schools_data

Unnamed: 0,School Name,Type,Size,Budget,Student Name,Student ID,Gender,Grade,Reading Score,Math Score
19584,Bailey High School,District,4976,3124928,Tammie Fox,19584,F,11th,82,92
21193,Bailey High School,District,4976,3124928,Jennifer Murray,21193,F,9th,88,89
21192,Bailey High School,District,4976,3124928,Lisa Pineda,21192,F,9th,86,67
21191,Bailey High School,District,4976,3124928,Cameron Miller,21191,M,11th,70,75
21190,Bailey High School,District,4976,3124928,Thomas Rasmussen,21190,M,12th,77,82
...,...,...,...,...,...,...,...,...,...,...
24829,Wright High School,Charter,1800,1049400,John Lozano,24829,M,11th,84,77
24828,Wright High School,Charter,1800,1049400,Jonathan Thomas,24828,M,12th,95,80
24827,Wright High School,Charter,1800,1049400,Lori Ramirez,24827,F,10th,74,74
24797,Wright High School,Charter,1800,1049400,Scott Scott,24797,M,10th,90,83


In [30]:
#Count the number of students attending each school
updated_complete_schools_data["School Name"].value_counts()

Bailey High School       4976
Johnson High School      4761
Hernandez High School    4635
Rodriguez High School    3999
Figueroa High School     2949
Huang High School        2917
Ford High School         2739
Wilson High School       2283
Cabrera High School      1858
Wright High School       1800
Shelton High School      1761
Thomas High School       1635
Griffin High School      1468
Pena High School          962
Holden High School        427
Name: School Name, dtype: int64

In [31]:
#Create a dictionary matching unique schools with their total student population
total_students_per_school_dict = {"Bailey High School": 4976, "Johnson High School": 4761, "Hernandez High School": 4635,
                                  "Rodriguez High School": 3999, "Figueroa High School": 2949, "Huang High School": 2917,
                                  "Ford High School": 2739, "Wilson High School": 2283, "Cabrera High School": 1858,
                                 "Wright High School": 1800, "Shelton High School": 1761, "Thomas High School": 1635,
                                  "Griffin High School": 1468, "Pena High School": 962, "Holden High School": 427}
sorted_total_students_per_school_dict = (sorted(total_students_per_school_dict.items()))
sorted_total_students_per_school_dict

[('Bailey High School', 4976),
 ('Cabrera High School', 1858),
 ('Figueroa High School', 2949),
 ('Ford High School', 2739),
 ('Griffin High School', 1468),
 ('Hernandez High School', 4635),
 ('Holden High School', 427),
 ('Huang High School', 2917),
 ('Johnson High School', 4761),
 ('Pena High School', 962),
 ('Rodriguez High School', 3999),
 ('Shelton High School', 1761),
 ('Thomas High School', 1635),
 ('Wilson High School', 2283),
 ('Wright High School', 1800)]

In [32]:
#Create a variable to store the value of the "sorted_total_students_per_school_list"
sorted_students_per_school_lst = (4976, 1858, 2949, 2739, 1468, 4635, 427, 2917, 4761, 962, 3999, 1761, 1635, 2283, 1800)
sorted_students_per_school_lst

(4976,
 1858,
 2949,
 2739,
 1468,
 4635,
 427,
 2917,
 4761,
 962,
 3999,
 1761,
 1635,
 2283,
 1800)

In [33]:
#Remove duplicate schools
unique_complete_schools_data = updated_complete_schools_data.drop_duplicates(subset="School Name", keep="first")
unique_complete_schools_data

Unnamed: 0,School Name,Type,Size,Budget,Student Name,Student ID,Gender,Grade,Reading Score,Math Score
19584,Bailey High School,District,4976,3124928,Tammie Fox,19584,F,11th,82,92
16944,Cabrera High School,Charter,1858,1081356,William Duncan,16944,M,11th,78,68
3310,Figueroa High School,District,2949,1884411,Brenda Scott,3310,F,12th,76,72
35233,Ford High School,District,2739,1763916,Zoe Burton,35233,F,9th,91,55
13322,Griffin High School,Charter,1468,917500,Wendy Rogers,13322,F,9th,76,83
12013,Hernandez High School,District,4635,3022020,Joshua Bautista,12013,M,11th,90,79
22954,Holden High School,Charter,427,248087,Jessica Hanna,22954,F,10th,87,73
0,Huang High School,District,2917,1910635,Paul Bradley,0,M,9th,66,79
30720,Johnson High School,District,4761,3094650,Lisa Lyons,30720,F,9th,88,70
23957,Pena High School,Charter,962,585858,Gregory Ruiz,23957,M,11th,82,85


In [34]:
#Add a "Total Students" column
unique_complete_schools_data["Total Students"] = sorted_students_per_school_lst
unique_complete_schools_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,School Name,Type,Size,Budget,Student Name,Student ID,Gender,Grade,Reading Score,Math Score,Total Students
19584,Bailey High School,District,4976,3124928,Tammie Fox,19584,F,11th,82,92,4976
16944,Cabrera High School,Charter,1858,1081356,William Duncan,16944,M,11th,78,68,1858
3310,Figueroa High School,District,2949,1884411,Brenda Scott,3310,F,12th,76,72,2949
35233,Ford High School,District,2739,1763916,Zoe Burton,35233,F,9th,91,55,2739
13322,Griffin High School,Charter,1468,917500,Wendy Rogers,13322,F,9th,76,83,1468
12013,Hernandez High School,District,4635,3022020,Joshua Bautista,12013,M,11th,90,79,4635
22954,Holden High School,Charter,427,248087,Jessica Hanna,22954,F,10th,87,73,427
0,Huang High School,District,2917,1910635,Paul Bradley,0,M,9th,66,79,2917
30720,Johnson High School,District,4761,3094650,Lisa Lyons,30720,F,9th,88,70,4761
23957,Pena High School,Charter,962,585858,Gregory Ruiz,23957,M,11th,82,85,962


In [35]:
#Make the "School Name" column into the index
indexed_complete_schools_data = unique_complete_schools_data.set_index("School Name", drop=True)
indexed_complete_schools_data

Unnamed: 0_level_0,Type,Size,Budget,Student Name,Student ID,Gender,Grade,Reading Score,Math Score,Total Students
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Bailey High School,District,4976,3124928,Tammie Fox,19584,F,11th,82,92,4976
Cabrera High School,Charter,1858,1081356,William Duncan,16944,M,11th,78,68,1858
Figueroa High School,District,2949,1884411,Brenda Scott,3310,F,12th,76,72,2949
Ford High School,District,2739,1763916,Zoe Burton,35233,F,9th,91,55,2739
Griffin High School,Charter,1468,917500,Wendy Rogers,13322,F,9th,76,83,1468
Hernandez High School,District,4635,3022020,Joshua Bautista,12013,M,11th,90,79,4635
Holden High School,Charter,427,248087,Jessica Hanna,22954,F,10th,87,73,427
Huang High School,District,2917,1910635,Paul Bradley,0,M,9th,66,79,2917
Johnson High School,District,4761,3094650,Lisa Lyons,30720,F,9th,88,70,4761
Pena High School,Charter,962,585858,Gregory Ruiz,23957,M,11th,82,85,962


In [36]:
#Shift "Total Students" column to loc=1
indexed_complete_schools_data_short = indexed_complete_schools_data[["Type", "Total Students", "Budget"]]
indexed_complete_schools_data_short

Unnamed: 0_level_0,Type,Total Students,Budget
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bailey High School,District,4976,3124928
Cabrera High School,Charter,1858,1081356
Figueroa High School,District,2949,1884411
Ford High School,District,2739,1763916
Griffin High School,Charter,1468,917500
Hernandez High School,District,4635,3022020
Holden High School,Charter,427,248087
Huang High School,District,2917,1910635
Johnson High School,District,4761,3094650
Pena High School,Charter,962,585858


In [37]:
#Rename School Type and Total School Budget columns
indexed_complete_schools_data_short = indexed_complete_schools_data_short.rename(columns = {"Type":"School Type", "Budget":"Total School Budget"})
indexed_complete_schools_data_short

Unnamed: 0_level_0,School Type,Total Students,Total School Budget
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bailey High School,District,4976,3124928
Cabrera High School,Charter,1858,1081356
Figueroa High School,District,2949,1884411
Ford High School,District,2739,1763916
Griffin High School,Charter,1468,917500
Hernandez High School,District,4635,3022020
Holden High School,Charter,427,248087
Huang High School,District,2917,1910635
Johnson High School,District,4761,3094650
Pena High School,Charter,962,585858


In [38]:
for ind, row in indexed_complete_schools_data_short.iterrows():
    indexed_complete_schools_data_short.loc[ind, "Per Student Budget"] = row["Total School Budget"] / row["Total Students"]
indexed_complete_schools_data_short

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,District,4976,3124928,628.0
Cabrera High School,Charter,1858,1081356,582.0
Figueroa High School,District,2949,1884411,639.0
Ford High School,District,2739,1763916,644.0
Griffin High School,Charter,1468,917500,625.0
Hernandez High School,District,4635,3022020,652.0
Holden High School,Charter,427,248087,581.0
Huang High School,District,2917,1910635,655.0
Johnson High School,District,4761,3094650,650.0
Pena High School,Charter,962,585858,609.0


In [39]:
groupby_math = updated_complete_schools_data.groupby(["School Name"])["Math Score"].mean()
groupby_math.head(15)

School Name
Bailey High School       77.048432
Cabrera High School      83.061895
Figueroa High School     76.711767
Ford High School         77.102592
Griffin High School      83.351499
Hernandez High School    77.289752
Holden High School       83.803279
Huang High School        76.629414
Johnson High School      77.072464
Pena High School         83.839917
Rodriguez High School    76.842711
Shelton High School      83.359455
Thomas High School       83.418349
Wilson High School       83.274201
Wright High School       83.682222
Name: Math Score, dtype: float64

In [40]:
groupby_reading = updated_complete_schools_data.groupby(["School Name"])["Reading Score"].mean()
groupby_reading.head(15)

School Name
Bailey High School       81.033963
Cabrera High School      83.975780
Figueroa High School     81.158020
Ford High School         80.746258
Griffin High School      83.816757
Hernandez High School    80.934412
Holden High School       83.814988
Huang High School        81.182722
Johnson High School      80.966394
Pena High School         84.044699
Rodriguez High School    80.744686
Shelton High School      83.725724
Thomas High School       83.848930
Wilson High School       83.989488
Wright High School       83.955000
Name: Reading Score, dtype: float64

In [41]:
lst = [77.048432, 83.061895, 76.711767, 77.102592, 83.351499, 77.289752, 83.803279, 76.629414,
        77.072464, 83.839917, 76.842711, 83.359455, 83.418349, 83.274201, 83.682222]

groupby_math_lst = (lst)


per_school_math_avg = pd.Series(groupby_math_lst)
per_school_math_avg.head(15)

0     77.048432
1     83.061895
2     76.711767
3     77.102592
4     83.351499
5     77.289752
6     83.803279
7     76.629414
8     77.072464
9     83.839917
10    76.842711
11    83.359455
12    83.418349
13    83.274201
14    83.682222
dtype: float64

In [42]:
lst2 = [81.033963, 83.975780, 81.158020, 80.746258, 83.816757, 80.934412, 83.814988, 81.182722, 80.966394, 84.044699, 80.744686,
        83.725724, 83.848930, 83.989488, 83.955000]

groupby_reading_lst = (lst2)


per_school_reading_avg = pd.Series(groupby_reading_lst)
per_school_reading_avg.head(15)

0     81.033963
1     83.975780
2     81.158020
3     80.746258
4     83.816757
5     80.934412
6     83.814988
7     81.182722
8     80.966394
9     84.044699
10    80.744686
11    83.725724
12    83.848930
13    83.989488
14    83.955000
dtype: float64

In [43]:
#Add "Average Math Score" column 
lst = [77.048432, 83.061895, 76.711767, 77.102592, 83.351499, 77.289752, 83.803279, 76.629414,
        77.072464, 83.839917, 76.842711, 83.359455, 83.418349, 83.274201, 83.682222]

indexed_complete_schools_data_short["Average Math Score"] = lst
indexed_complete_schools_data_short

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bailey High School,District,4976,3124928,628.0,77.048432
Cabrera High School,Charter,1858,1081356,582.0,83.061895
Figueroa High School,District,2949,1884411,639.0,76.711767
Ford High School,District,2739,1763916,644.0,77.102592
Griffin High School,Charter,1468,917500,625.0,83.351499
Hernandez High School,District,4635,3022020,652.0,77.289752
Holden High School,Charter,427,248087,581.0,83.803279
Huang High School,District,2917,1910635,655.0,76.629414
Johnson High School,District,4761,3094650,650.0,77.072464
Pena High School,Charter,962,585858,609.0,83.839917


In [44]:
#Add "Average Reading Score" column 
lst2 = [81.033963, 83.975780, 81.158020, 80.746258, 83.816757, 80.934412, 83.814988, 81.182722, 80.966394, 84.044699, 80.744686,
        83.725724, 83.848930, 83.989488, 83.955000]

indexed_complete_schools_data_short["Average Reading Score"] = lst2
indexed_complete_schools_data_short

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963
Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802
Ford High School,District,2739,1763916,644.0,77.102592,80.746258
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757
Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412
Holden High School,Charter,427,248087,581.0,83.803279,83.814988
Huang High School,District,2917,1910635,655.0,76.629414,81.182722
Johnson High School,District,4761,3094650,650.0,77.072464,80.966394
Pena High School,Charter,962,585858,609.0,83.839917,84.044699


In [46]:
math_passing_data_per_school = updated_complete_schools_data.loc[(updated_complete_schools_data["Math Score"] >= 70), :]
math_passing_data_per_school

Unnamed: 0,School Name,Type,Size,Budget,Student Name,Student ID,Gender,Grade,Reading Score,Math Score
19584,Bailey High School,District,4976,3124928,Tammie Fox,19584,F,11th,82,92
21193,Bailey High School,District,4976,3124928,Jennifer Murray,21193,F,9th,88,89
21191,Bailey High School,District,4976,3124928,Cameron Miller,21191,M,11th,70,75
21190,Bailey High School,District,4976,3124928,Thomas Rasmussen,21190,M,12th,77,82
21189,Bailey High School,District,4976,3124928,Jennifer Miller,21189,F,11th,95,83
...,...,...,...,...,...,...,...,...,...,...
24829,Wright High School,Charter,1800,1049400,John Lozano,24829,M,11th,84,77
24828,Wright High School,Charter,1800,1049400,Jonathan Thomas,24828,M,12th,95,80
24827,Wright High School,Charter,1800,1049400,Lori Ramirez,24827,F,10th,74,74
24797,Wright High School,Charter,1800,1049400,Scott Scott,24797,M,10th,90,83


In [50]:
math_passing_data_per_school["School Name"].value_counts()

Bailey High School       3318
Johnson High School      3145
Hernandez High School    3094
Rodriguez High School    2654
Wilson High School       2143
Figueroa High School     1946
Huang High School        1916
Ford High School         1871
Cabrera High School      1749
Wright High School       1680
Shelton High School      1653
Thomas High School       1525
Griffin High School      1371
Pena High School          910
Holden High School        395
Name: School Name, dtype: int64

In [51]:
reading_passing_data_per_school = updated_complete_schools_data.loc[(updated_complete_schools_data["Reading Score"] >= 70), :]
reading_passing_data_per_school

Unnamed: 0,School Name,Type,Size,Budget,Student Name,Student ID,Gender,Grade,Reading Score,Math Score
19584,Bailey High School,District,4976,3124928,Tammie Fox,19584,F,11th,82,92
21193,Bailey High School,District,4976,3124928,Jennifer Murray,21193,F,9th,88,89
21192,Bailey High School,District,4976,3124928,Lisa Pineda,21192,F,9th,86,67
21191,Bailey High School,District,4976,3124928,Cameron Miller,21191,M,11th,70,75
21190,Bailey High School,District,4976,3124928,Thomas Rasmussen,21190,M,12th,77,82
...,...,...,...,...,...,...,...,...,...,...
24829,Wright High School,Charter,1800,1049400,John Lozano,24829,M,11th,84,77
24828,Wright High School,Charter,1800,1049400,Jonathan Thomas,24828,M,12th,95,80
24827,Wright High School,Charter,1800,1049400,Lori Ramirez,24827,F,10th,74,74
24797,Wright High School,Charter,1800,1049400,Scott Scott,24797,M,10th,90,83


In [52]:
reading_passing_data_per_school["School Name"].value_counts()

Bailey High School       4077
Johnson High School      3867
Hernandez High School    3748
Rodriguez High School    3208
Figueroa High School     2381
Huang High School        2372
Wilson High School       2204
Ford High School         2172
Cabrera High School      1803
Wright High School       1739
Shelton High School      1688
Thomas High School       1591
Griffin High School      1426
Pena High School          923
Holden High School        411
Name: School Name, dtype: int64

In [None]:
overall_percent_passing_math = (passing_math/total_unique_students)*100
overall_percent_passing_math