District Summary

In [1]:
# Dependancies
import pandas as pd

In [2]:
# Save path to data set in variable
school_file = "Resources/schools_complete.csv"
student_file = "Resources/students_complete.csv"

In [3]:
# Use Pandas to read data
school_file_df = pd.read_csv(school_file)
student_file_df = pd.read_csv(student_file)

In [4]:
# Print school data
school_file_df

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500
5,5,Wilson High School,Charter,2283,1319574
6,6,Cabrera High School,Charter,1858,1081356
7,7,Bailey High School,District,4976,3124928
8,8,Holden High School,Charter,427,248087
9,9,Pena High School,Charter,962,585858


In [5]:
# Find the total budget
budget_total_df = school_file_df["budget"].sum()
budget_total_df

24649428

In [6]:
# Print student data
student_file_df

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84
...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90


In [7]:
# Merge school data and student data
school_data_complete = pd.merge(student_file_df, school_file_df, how="left", on=["school_name", "school_name"])
school_data_complete

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130


In [8]:
# Rename columns for easier reading
school_data_renamed_df = school_data_complete.rename(columns={"student_name":"Student Name", "gender":"Gender", "grade":"Grade", "school_name":"School Name", "reading_score":"Reading Score", "math_score":"Math Score", "type":"Type of School", "size":"School Population", "budget":"Budget"})
school_data_renamed_df

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score,School ID,Type of School,School Population,Budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130


In [43]:
# Check column types
school_data_renamed_df.dtypes

Student ID            int64
Student Name         object
Gender               object
Grade                object
School Name          object
Reading Score         int64
Math Score            int64
School ID             int64
Type of School       object
School Population     int64
Budget                int64
dtype: object

In [9]:
# Find the average reading score
reading_average_df = school_data_renamed_df["Reading Score"].mean()
reading_average_df

81.87784018381414

In [10]:
# Find the average math score
math_average_df = school_data_renamed_df["Math Score"].mean()
math_average_df

78.98537145774827

In [11]:
# Total number of schools
school_count_unique_df = school_data_renamed_df["School ID"].nunique()
school_count_unique_df

15

In [12]:
# Total number of students
student_count_unique_df = school_data_renamed_df["Student ID"].nunique()
student_count_unique_df

39170

In [13]:
# List of students with a score of 70 or higher
passing_math_students_df = school_data_renamed_df.loc[(school_data_renamed_df["Math Score"] >= 70)]
passing_math_students_df

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score,School ID,Type of School,School Population,Budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
5,5,Bryan Miranda,M,9th,Huang High School,94,94,0,District,2917,1910635
6,6,Sheena Carter,F,11th,Huang High School,82,80,0,District,2917,1910635
8,8,Michael Roth,M,10th,Huang High School,95,87,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130


In [14]:
# Number of students with passing math score
passing_math_unique_df = passing_math_students_df["Student ID"].nunique()
passing_math_unique_df

29370

In [15]:
# % of students with passing math score
percent_passing_math_df = (passing_math_unique_df/student_count_unique_df) * 100
percent_passing_math_df

74.9808526933878

In [16]:
# List of students with passing reading score
passing_reading_unique_df = school_data_renamed_df.loc[(school_data_renamed_df["Reading Score"] >= 70)]
passing_reading_unique_df

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score,School ID,Type of School,School Population,Budget
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
5,5,Bryan Miranda,M,9th,Huang High School,94,94,0,District,2917,1910635
6,6,Sheena Carter,F,11th,Huang High School,82,80,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130


In [17]:
# Number of students with passing reading scores
passing_reading_unique_df = passing_reading_unique_df["Student ID"].nunique()
passing_reading_unique_df

33610

In [18]:
# % of students with passing reading score
percent_passing_reading_df = (passing_reading_unique_df/student_count_unique_df) * 100
percent_passing_reading_df

85.80546336482001

In [19]:
# District Summary
overall_summary_df = pd.DataFrame({"Total Schools": [school_count_unique_df],
                                  "Total Students": [student_count_unique_df],
                                  "Total Budget": [budget_total_df],
                                  "Average Math Score": [math_average_df],
                                   "Average Reading Score": [reading_average_df],
                                  "% Passing Math": [percent_passing_math_df],
                                  "% Passing Reading": [percent_passing_reading_df]})
overall_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463


In [20]:
# Print school names
schools_df = school_data_renamed_df["School Name"].unique()
schools_df

array(['Huang High School', 'Figueroa High School', 'Shelton High School',
       'Hernandez High School', 'Griffin High School',
       'Wilson High School', 'Cabrera High School', 'Bailey High School',
       'Holden High School', 'Pena High School', 'Wright High School',
       'Rodriguez High School', 'Johnson High School', 'Ford High School',
       'Thomas High School'], dtype=object)

In [29]:
# Put schools as row headers
school_data_df = school_data_renamed_df.set_index("School Name")
school_data_df.head()

Unnamed: 0_level_0,Student ID,Student Name,Gender,Grade,Reading Score,Math Score,School ID,Type of School,School Population,Budget
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Huang High School,0,Paul Bradley,M,9th,66,79,0,District,2917,1910635
Huang High School,1,Victor Smith,M,12th,94,61,0,District,2917,1910635
Huang High School,2,Kevin Rodriguez,M,12th,90,60,0,District,2917,1910635
Huang High School,3,Dr. Richard Scott,M,12th,67,58,0,District,2917,1910635
Huang High School,4,Bonnie Ray,F,9th,97,84,0,District,2917,1910635


In [41]:
# Put schools in alpha order
school_sort_df = school_data_df.sort_values("School Name")
school_sort_df

Unnamed: 0_level_0,Student ID,Student Name,Gender,Grade,Reading Score,Math Score,School ID,Type of School,School Population,Budget
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Bailey High School,19584,Tammie Fox,F,11th,82,92,7,District,4976,3124928
Bailey High School,21193,Jennifer Murray,F,9th,88,89,7,District,4976,3124928
Bailey High School,21192,Lisa Pineda,F,9th,86,67,7,District,4976,3124928
Bailey High School,21191,Cameron Miller,M,11th,70,75,7,District,4976,3124928
Bailey High School,21190,Thomas Rasmussen,M,12th,77,82,7,District,4976,3124928
...,...,...,...,...,...,...,...,...,...,...
Wright High School,24829,John Lozano,M,11th,84,77,10,Charter,1800,1049400
Wright High School,24828,Jonathan Thomas,M,12th,95,80,10,Charter,1800,1049400
Wright High School,24827,Lori Ramirez,F,10th,74,74,10,Charter,1800,1049400
Wright High School,24797,Scott Scott,M,10th,90,83,10,Charter,1800,1049400


In [50]:
school_sort_df["Type of School"]

School Name
Bailey High School    District
Bailey High School    District
Bailey High School    District
Bailey High School    District
Bailey High School    District
                        ...   
Wright High School     Charter
Wright High School     Charter
Wright High School     Charter
Wright High School     Charter
Wright High School     Charter
Name: Type of School, Length: 39170, dtype: object

In [None]:
school_summary_df = pd.DataFrame ({" ":[],
                                 "School Type": [],
                                 "Total Students": [],
                                 "Total School Budget": [],
                                 "Per Student Budget": [],
                                 "Average Math Score": [],
                                 "Average Reading Score": [],
                                 "% Passing Math": [],
                                 "% Passing Reading": [],
                                 "% Overall Passing": []})
school_summary_df               