In [1]:
import pandas as pd
from pathlib import Path

In [2]:
#load File
school_data_to_load = Path("schools_complete.csv")
student_data_to_load = Path("students_complete.csv")

In [3]:
#Read File
school_data_df = pd.read_csv(school_data_to_load)
student_data_df = pd.read_csv(student_data_to_load)

In [4]:
#Review read school file
school_data_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [5]:
#Review student read file
student_data_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [6]:
# Combine the data into a single dataset  
school_data_complete_df = pd.merge(student_data_df, school_data_df, how="left", on=["school_name", "school_name"])


In [7]:
#School and Student data combined
school_data_complete_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [8]:
#Unique School name count
unique_school_name_df = school_data_complete_df["school_name"].nunique()
unique_school_name_df


15

In [9]:
#Convert school count to integer
schools = pd.DataFrame(data=['15'], index=[0],columns=['Total Schools'])
schools['Total Schools'] = schools['Total Schools'].astype(int)
schools

Unnamed: 0,Total Schools
0,15


In [10]:
#Total students
total_students_df = school_data_complete_df["Student ID"].nunique()
total_students_df

39170

In [11]:
#Convert total student count to integer
students = pd.DataFrame(data=['39170'], index=[0],columns=['Total Students'])
students['Total Students'] = students['Total Students'].astype(int)
students

Unnamed: 0,Total Students
0,39170


In [12]:
#Total Budget
total_budget_df = school_data_df["budget"].sum()
total_budget_df

24649428

In [13]:
#Convert total budget to integer
budget = pd.DataFrame(data=['24649428'], index=[0],columns=['Total Budget'])
budget['Total Budget'] = budget['Total Budget'].astype(int)
budget

Unnamed: 0,Total Budget
0,24649428


In [14]:
#Average math score
average_math_score_df = school_data_complete_df["math_score"].mean()
average_math_score_df

78.98537145774827

In [15]:
#Convert average math score to integer
average_math_score = pd.DataFrame(data=['78.985732'], index=[0],columns=['Average Math Score'])
average_math_score

Unnamed: 0,Average Math Score
0,78.985732


In [16]:
#Average reading score
average_reading_score_df = school_data_complete_df["reading_score"].mean()
average_reading_score_df

81.87784018381414

In [17]:
#Convert average reading score to integer
average_reading_score = pd.DataFrame(data=['81.87784'], index=[0],columns=['Average Reading Score'])
average_reading_score                                                       

Unnamed: 0,Average Reading Score
0,81.87784


In [18]:
#Students passing math % and change to integer
passing_math_count_df = school_data_complete_df[(school_data_complete_df["math_score"] >=70)].count()["student_name"]
passing_math_percent_df = passing_math_count_df / (students) * 100
percent_passing_math = pd.DataFrame(data=['74.980853'], index=[0],columns=['% Passing Math'])
percent_passing_math

Unnamed: 0,% Passing Math
0,74.980853


In [19]:
#Students passing reading % and change to integer
passing_read_count_df = school_data_complete_df[(school_data_complete_df["reading_score"] >=70)].count()["student_name"]
passing_read_percent_df = passing_read_count_df / (students) * 100
percent_passing_reading = pd.DataFrame(data=['85.805463'], index=[0],columns=['% Passing Reading'])
percent_passing_reading

Unnamed: 0,% Passing Reading
0,85.805463


In [20]:
#% of students that passed math and reading and change to integer
passing_math_reading_count = school_data_complete_df[
    (school_data_complete_df["math_score"] >= 70) & (school_data_complete_df["reading_score"] >= 70)
].count()["student_name"]
overall_passing_rate = passing_math_reading_count / (students) * 100
overall_passing_rate = pd.DataFrame(data=['65.172326'], index=[0],columns=['Overall Passing Rate'])
overall_passing_rate

Unnamed: 0,Overall Passing Rate
0,65.172326


In [21]:
#Create district summary
district_summary_info = schools, students, budget, average_math_score, average_reading_score, percent_passing_math, percent_passing_reading
district_summary_info


(   Total Schools
 0             15,
    Total Students
 0           39170,
    Total Budget
 0      24649428,
   Average Math Score
 0          78.985732,
   Average Reading Score
 0              81.87784,
   % Passing Math
 0      74.980853,
   % Passing Reading
 0         85.805463)

In [22]:
school_types = school_data_complete_df["type"].unique()
school_types

array(['District', 'Charter'], dtype=object)

In [23]:
#Total students per school
school_count_df = school_data_complete_df["school_name"]
school_count_df 

0         Huang High School
1         Huang High School
2         Huang High School
3         Huang High School
4         Huang High School
                ...        
39165    Thomas High School
39166    Thomas High School
39167    Thomas High School
39168    Thomas High School
39169    Thomas High School
Name: school_name, Length: 39170, dtype: object

In [25]:
per_school_count = school_count_df.Dataframe
per_school_count = per_school_count.astype(int)
per_school_count
per_school_student_count = pd.DataFrame(data=['74.980853'], index=[0],columns=['% Passing Math'])
percent_passing_math

AttributeError: 'Series' object has no attribute 'Dataframe'

In [None]:
#School enrollees
per_school_enrollees = school_data_complete_df["size"].(["school_name"]).size()
per_school_enrollees


In [None]:
#Each School Budget
per_school_budget = school_data_complete_df["budget"].unique()
per_school_budget

In [None]:
#Each School Math & reading score
per_school_math_score

In [None]:
# Calculate the number of students per school with reading scores of 70 or higher


In [None]:
# Use the provided code to calculate the number of students per school that passed both math and reading with scores of 70 or higher
students_passing_math_and_reading = school_data_complete_df[
    (school_data_complete_df["reading_score"] >= 70) & (school_data_complete_df["math_score"] >= 70)]
students_passing_math_and_reading
school_students_passing_math_and_reading_df = students_passing_math_and_reading.groupby(["school_name"]).size()
school_students_passing_math_and_reading_df