In [1]:
import pandas as pd
from pathlib import Path

In [2]:
# Read CSV file into DataFrame
school_data_csv = Path("Resources/schools_complete.csv")
student_data_csv = Path("Resources/students_complete.csv")

#Read school and student data and store in Panda Dataframes
school_data = pd.read_csv(school_data_csv)
student_data = pd.read_csv(student_data_csv)

#Merging dataframes

school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [3]:
# Calculate the total number of unique schools
num_unique_schools = school_data_complete['school_name'].nunique()
num_unique_schools

15

In [4]:
#Calculate total students
total_students = len(school_data_complete)
total_students


39170

In [5]:
#Calculate total budget
total_budget = school_data['budget'].sum()
total_budget

24649428

In [6]:
#Calculate the average math score
average_math_score = school_data_complete["math_score"].mean()
average_math_score

78.98537145774827

In [7]:
#Calculate the average reading score
average_reading_score = school_data_complete["reading_score"].mean()
average_reading_score

81.87784018381414

In [8]:
#Calculate the percentage of students who passed math (math scores greather than or equal to 70)

#Students with (greater than or equal to 70 / total students) (29370/39170*100) = 74.98%

passing_math = student_data["math_score"] >= 70
percent_passing_math = passing_math.mean() * 100
percent_passing_math

74.9808526933878

In [9]:
#Calculate the percentage of students who passeed reading

#Students with (greater than or equal to 70 / total students) (33610/39170*100) = 85.81%

passing_reading = student_data["reading_score"] >= 70
percent_passing_reading = passing_reading.mean() * 100

percent_passing_reading 

85.80546336482001

In [10]:
# Create a new column in the DataFrame called "Passing Both"
school_data_complete["Passing Both"] = (school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70)

# Calculate the percentage of students who passed both math and reading
passing_both_percent = school_data_complete["Passing Both"].mean() * 100

# Print the percentage of students who passed both math and reading
passing_both_percent


65.17232575950983

In [11]:
# District Summary

district_summary = pd.DataFrame({
    "Total Schools": [num_unique_schools],
    "Total Students": [total_students],
    "Total Budget": [total_budget],
    "Average Math Score": [average_math_score],
    "Average Reading Score": [average_reading_score],
    "% Passing Math": [percent_passing_math],
    "% Passing Reading": [percent_passing_reading],
    "% Overall Passing Rate": [passing_both_percent]
}).set_index("Total Schools")

district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)

# Display the DataFrame
district_summary


Unnamed: 0_level_0,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Total Schools,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


In [12]:
#Separate by school type
school_types = school_data.set_index(["school_name"])["type"]

# Convert to aDataFrame
df_school_types = pd.DataFrame(school_types)

# Display the DataFrame
df_school_types

Unnamed: 0_level_0,type
school_name,Unnamed: 1_level_1
Huang High School,District
Figueroa High School,District
Shelton High School,Charter
Hernandez High School,District
Griffin High School,Charter
Wilson High School,Charter
Cabrera High School,Charter
Bailey High School,District
Holden High School,Charter
Pena High School,Charter


In [13]:
#Students per school
total_students = school_data.set_index(["school_name"])["size"]

# Convert to aDataFrame
df_total_students = pd.DataFrame(total_students)

# Display the DataFrame
df_total_students

Unnamed: 0_level_0,size
school_name,Unnamed: 1_level_1
Huang High School,2917
Figueroa High School,2949
Shelton High School,1761
Hernandez High School,4635
Griffin High School,1468
Wilson High School,2283
Cabrera High School,1858
Bailey High School,4976
Holden High School,427
Pena High School,962


In [14]:
#Calculate the total school budget and per capita spending

# Calculate total budget
total_budget = school_data_complete["budget"].unique().sum()

# Calculate total student count
total_students = school_data_complete["Student ID"].count()

# Calculate per capita spending
per_capita_spending = total_budget / total_students

print(f"Total Budget: ${total_budget:,.2f}")
print(f"Total Students: {total_students:,}")
print(f"Per Capita Spending: ${per_capita_spending:,.2f}")

Total Budget: $24,649,428.00
Total Students: 39,170
Per Capita Spending: $629.29


In [15]:
#Per School Average scores

avg_scores = school_data_complete.groupby(["school_name"]).mean()[["math_score", "reading_score"]]

avg_scores

  avg_scores = school_data_complete.groupby(["school_name"]).mean()[["math_score", "reading_score"]]


Unnamed: 0_level_0,math_score,reading_score
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Bailey High School,77.048432,81.033963
Cabrera High School,83.061895,83.97578
Figueroa High School,76.711767,81.15802
Ford High School,77.102592,80.746258
Griffin High School,83.351499,83.816757
Hernandez High School,77.289752,80.934412
Holden High School,83.803279,83.814988
Huang High School,76.629414,81.182722
Johnson High School,77.072464,80.966394
Pena High School,83.839917,84.044699


In [37]:
passing_math = student_data["math_score"] >= 70
schools_passing_math = passing_math.groupby(student_data["school_name"]).any().sum()
schools_passing_math

15

In [39]:
passing_reading = student_data["reading_score"] >= 70
schools_passing_reading = passing_reading.groupby(student_data["school_name"]).any().sum()
schools_passing_reading

15

In [47]:
passing_math_and_reading = school_data_complete[
    (school_data_complete["reading_score"] >= 70) & (school_data_complete["math_score"] >= 70)]
passing_math_and_reading

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget,Passing Both,passing_math,passing_reading
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635,True,True,True
5,5,Bryan Miranda,M,9th,Huang High School,94,94,0,District,2917,1910635,True,True,True
6,6,Sheena Carter,F,11th,Huang High School,82,80,0,District,2917,1910635,True,True,True
8,8,Michael Roth,M,10th,Huang High School,95,87,0,District,2917,1910635,True,True,True
9,9,Matthew Greene,M,10th,Huang High School,96,84,0,District,2917,1910635,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130,True,True,True
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130,True,True,True
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130,True,True,True
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130,True,True,True


In [51]:
passing_math_and_reading = school_data_complete[(school_data_complete["reading_score"] >= 70) & (school_data_complete["math_score"] >= 70)]
count_passing = len(passing_math_and_reading)
print(count_passing)

25528
