In [2]:
import csv
import os
import pandas as pd

student_fp = "../Resources/students_complete.csv"
school_fp = "../Resources/schools_complete.csv"

student_df = pd.read_csv(student_fp)
school_df = pd.read_csv(school_fp)

In [3]:
student_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [4]:
school_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [5]:
# Finding the total number of students and schools

num_schools = school_df["school_name"].nunique()
num_students = student_df["student_name"].nunique()

print(num_schools)
print(num_students)

15
32715


In [6]:
# Finding the total budget for all of the schools

total_budget = school_df['budget'].sum()
print(total_budget)

24649428


In [7]:
# Finding the average math score for all of the students

avg_math_score = round(student_df["math_score"].mean(), 2)
print(str(avg_math_score) + "%")

78.99%


In [10]:
# Making a df to look at the students passing math - score over 65%

passing_math_count = student_df[student_df["math_score"] > 65]

In [11]:
# Percent of students passing math - score over 65%

passing_math_total=passing_math_count['student_name'].nunique()
percent_passing_math = round(passing_math_total/num_students*100, 2)
print(str(percent_passing_math) + "%")

85.17%


In [12]:
# Making a df to look at the students passing reading - score over 65%

passing_reading_df = student_df.loc[student_df["reading_score"] > 65, ["student_name", "gender", "grade", 'school_name', "reading_score"]]

In [13]:
# Calculating the percentage of students passing reading - score greater than 65%

passing_reading_total = passing_reading_df['student_name'].nunique()
percent_passing_reading = round(passing_reading_total/num_students*100, 2)
print(str(percent_passing_reading) + "%")

94.94%


In [14]:
# Making a df to look at the students that are passing math and reading - scores greater than 65%

overall_passing_df = student_df.loc[(student_df["reading_score"] > 65) & (student_df["math_score"] > 65), ["student_name", "gender", "grade", 'school_name', "reading_score", "math_score"]]

In [15]:
# Caculating the number of students that are passing math and reading - scores greater than 65%

overall_passing_total = overall_passing_df["student_name"].nunique()
percent_overall_passing = round(overall_passing_total/num_students*100, 2)
print(str(percent_overall_passing) + "%")

81.23%


In [16]:
# Making a DF with a summary of district statitics

district_df = pd.DataFrame({
    "Total Unique Schools": [num_schools],
    "Total Students": [num_students],
    "Total Budget": [total_budget],
    "Average Math Score": [avg_math_score],
    "% Passing Math": [percent_passing_math],
    "% Passing Reading": [percent_passing_reading],
    "% Passing Overall": [percent_overall_passing]
})
district_df

Unnamed: 0,Total Unique Schools,Total Students,Total Budget,Average Math Score,% Passing Math,% Passing Reading,% Passing Overall
0,15,32715,24649428,78.99,85.17,94.94,81.23


In [17]:
school_df.value_counts()

School ID  school_name            type      size  budget 
0          Huang High School      District  2917  1910635    1
1          Figueroa High School   District  2949  1884411    1
2          Shelton High School    Charter   1761  1056600    1
3          Hernandez High School  District  4635  3022020    1
4          Griffin High School    Charter   1468  917500     1
5          Wilson High School     Charter   2283  1319574    1
6          Cabrera High School    Charter   1858  1081356    1
7          Bailey High School     District  4976  3124928    1
8          Holden High School     Charter   427   248087     1
9          Pena High School       Charter   962   585858     1
10         Wright High School     Charter   1800  1049400    1
11         Rodriguez High School  District  3999  2547363    1
12         Johnson High School    District  4761  3094650    1
13         Ford High School       District  2739  1763916    1
14         Thomas High School     Charter   1635  1043130   

In [22]:
by_school_df = school_df.copy()

by_school_df.set_index('school_name', inplace=True)

by_school_df["Budget Per Student"] = by_school_df["budget"]/by_school_df["size"]



Unnamed: 0_level_0,School ID,type,size,budget,Budget Per Student
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Huang High School,0,District,2917,1910635,655.0
Figueroa High School,1,District,2949,1884411,639.0
Shelton High School,2,Charter,1761,1056600,600.0
Hernandez High School,3,District,4635,3022020,652.0
Griffin High School,4,Charter,1468,917500,625.0
Wilson High School,5,Charter,2283,1319574,578.0
Cabrera High School,6,Charter,1858,1081356,582.0
Bailey High School,7,District,4976,3124928,628.0
Holden High School,8,Charter,427,248087,581.0
Pena High School,9,Charter,962,585858,609.0
