In [1]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# File to Load (Remember to Change These)
school_data_to_load = Path("Resources/schools_complete.csv")
student_data_to_load = Path("Resources/students_complete.csv")

In [3]:
# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load, header=0)
student_data = pd.read_csv(student_data_to_load, header=0)
school_data.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [4]:
# Combine the data into a single dataset.
school_data_complete = pd.merge(student_data, school_data, how="left", on="school_name")
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [5]:
# Part 1: District Summary Calculations
# Calculate the total number of unique schools
school_count = school_data_complete["school_name"].nunique()
school_count

15

In [6]:
# Calculate the total number of students
student_count = school_data_complete["Student ID"].nunique()
student_count

39170

In [7]:
# Calculate the total budget
total_budget = school_data["budget"].sum()
print (f"${total_budget:,.2f}")

$24,649,428.00


In [8]:
# Average math score
avg_math = school_data_complete["math_score"].mean()
print (f"{avg_math:.2f}")

78.99


In [9]:
# Average reading score
avg_reading = school_data_complete["reading_score"].mean()
print (f"{avg_reading:.2f}")

81.88


In [10]:
# Percentage of students passing math
passing_math = school_data_complete["math_score"] >= 60
passing_math_percent = (passing_math.value_counts()/student_count)*100

passing_math_true = passing_math_percent[True]
passing_math_true

92.4457492979321

In [11]:
# Percentage of students passing reading
passing_reading = school_data_complete["reading_score"] >= 60
passing_reading_percent = (passing_reading.value_counts()/student_count)*100

passing_reading_true = passing_reading_percent[True]
passing_reading_true

100.0

In [12]:
# Percentage of students passing BOTH math and reading 
passing_both = school_data_complete[(school_data_complete["math_score"] >= 60) & 
                                    (school_data_complete["reading_score"] >= 60)]

percentate_passing_both = (len(passing_both)/len(school_data_complete))*100
percentate_passing_both

92.4457492979321

In [13]:
# Create a data frame that summarizes the school district values calculated above
# First, create a dictionary of lists
district_data = {"Total Schools": [school_count],
                 "Total Students": [student_count],
                 "Total Budget": [total_budget],
                 "Average Math Score": [avg_math],
                 "Average Reading Score": [avg_reading],
                 "Percent Passing Math": [passing_math_true],
                 "Percent Passing Reading": [passing_reading_true],
                 "Passing Grade Overall": [percentate_passing_both]}
# Then, call the dictionary as a data frame
district_data_df = pd.DataFrame(district_data)
district_data_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Passing Grade Overall
0,15,39170,24649428,78.985371,81.87784,92.445749,100.0,92.445749


In [14]:
# Part 2: School Summary Calculations

# Create a variable to hold the School Name
school_name = school_data_complete["school_name"].unique().tolist()
school_name


['Huang High School',
 'Figueroa High School',
 'Shelton High School',
 'Hernandez High School',
 'Griffin High School',
 'Wilson High School',
 'Cabrera High School',
 'Bailey High School',
 'Holden High School',
 'Pena High School',
 'Wright High School',
 'Rodriguez High School',
 'Johnson High School',
 'Ford High School',
 'Thomas High School']

In [15]:
# Make a list of school types
school_type = school_data_complete["type"].unique().tolist()
student_id = school_data_complete["Student ID"]
school_type


['District', 'Charter']

In [16]:
# Calculate the number of students per school
# Group the data in the combined sheet by school name and add it to a new variable school_groupby
school_groupby = school_data_complete.groupby(["school_name"])

#Calculates the total number of students per school and add that to a new variable school_counts
school_counts = school_groupby["school_name"].size().tolist()

# Make a library of the school_counts with the school name
student_count_summary = {"School Name": school_name,
                         "Total Students": school_counts}

# Print that library with the column school_name as an index 
student_count_summary_df = pd.DataFrame(student_count_summary).set_index("School Name")
student_count_summary_df


Unnamed: 0_level_0,Total Students
School Name,Unnamed: 1_level_1
Huang High School,4976
Figueroa High School,1858
Shelton High School,2949
Hernandez High School,2739
Griffin High School,1468
Wilson High School,4635
Cabrera High School,427
Bailey High School,2917
Holden High School,4761
Pena High School,962


In [17]:
# Caluculate the budget per school
# Use the variable created above to extract the budget column. Use the .mean function to prevent the column from adding together. Print to a list. 
school_budget = school_groupby["budget"].mean().tolist()

# Make a dictonary of the school_name and the school_budget
school_budget_all = {"School Name": school_name,
                     "School Budget": school_budget}

# create a dataframe with the school name as the index
school_budget_all_df = pd.DataFrame(school_budget_all).set_index("School Name")
school_budget_all_df

Unnamed: 0_level_0,School Budget
School Name,Unnamed: 1_level_1
Huang High School,3124928.0
Figueroa High School,1081356.0
Shelton High School,1884411.0
Hernandez High School,1763916.0
Griffin High School,917500.0
Wilson High School,3022020.0
Cabrera High School,248087.0
Bailey High School,1910635.0
Holden High School,3094650.0
Pena High School,585858.0


In [18]:
# Caluclate the budget per student
# Use the variable created above to extract the budget column. Use the .mean function to prevent the column from adding together. Print to a list. 
#school_budget = school_groupby["budget"].mean().tolist()

# Make a library of the school_name and the school_budget
#school_budget_all = {"School Name": school_name,
                    #"School Budget": school_budget}

# Calulcate the per student budget by dividing the school budget by the number of students
student_budget = school_budget_all_df["School Budget"]/student_count_summary_df["Total Students"]
student_budget_all = {"School Name": school_name,
                     "Per Student Budget": student_budget}
student_budget_all_df = pd.DataFrame(student_budget_all).set_index("School Name")
student_budget_all_df

Unnamed: 0_level_0,Per Student Budget
School Name,Unnamed: 1_level_1
Huang High School,628.0
Figueroa High School,582.0
Shelton High School,639.0
Hernandez High School,644.0
Griffin High School,625.0
Wilson High School,652.0
Cabrera High School,581.0
Bailey High School,655.0
Holden High School,650.0
Pena High School,609.0


In [33]:
 
# Calculate the average math score for each school
# Sum all the math scores for each school, then divide by the total number of students at that school

school_math_scores_df = school_data_complete[["school_name", "math_score", "size"]] 
#school_math_scores_df.head()

school_math_group = school_math_scores_df.groupby("school_name")["math_score"].sum()
#school_math_group

school_math_group_df = pd.DataFrame(school_math_group)    
#school_math_group_df

#math_score_avg = school_math_group_df["school_math_group"].astype(int)/school_math_scores_df["size"].astype(int)
#math_score_avg

avg_math_score = (school_math_group/(len(school_math_group)))
#avg_math_score

avg_math_score_df = pd.DataFrame(avg_math_score)
avg_math_score_df
                                                     

Unnamed: 0_level_0,math_score
school_name,Unnamed: 1_level_1
Bailey High School,25559.533333
Cabrera High School,10288.6
Figueroa High School,15081.533333
Ford High School,14078.933333
Griffin High School,8157.333333
Hernandez High School,23882.533333
Holden High School,2385.6
Huang High School,14901.866667
Johnson High School,24462.8
Pena High School,5376.933333


In [19]:
# Make a list of columns in the data set for easy reference
columns = school_data_complete.columns.tolist()
columns

['Student ID',
 'student_name',
 'gender',
 'grade',
 'school_name',
 'reading_score',
 'math_score',
 'School ID',
 'type',
 'size',
 'budget']

In [38]:
#Calculate the average reading score per school

school_reading_scores_df = school_data_complete[["school_name", "reading_score"]] 
#school_reading_scores_df.head()

school_reading_group = school_reading_scores_df.groupby("school_name")["reading_score"].sum()
#school_reading_group

school_reading_group_df = pd.DataFrame(school_reading_group)    
#school_reading_group_df

avg_reading_score = (school_reading_group/(len(school_reading_group)))
#avg_reading_score

avg_reading_score_df = pd.DataFrame(avg_reading_score)
avg_reading_score_df

Unnamed: 0_level_0,reading_score
school_name,Unnamed: 1_level_1
Bailey High School,26881.666667
Cabrera High School,10401.8
Figueroa High School,15955.666667
Ford High School,14744.266667
Griffin High School,8202.866667
Hernandez High School,25008.733333
Holden High School,2385.933333
Huang High School,15787.333333
Johnson High School,25698.733333
Pena High School,5390.066667


In [None]:
# Include the following: %passing math, %passing reading, %passing overall.