### School Analysis
helping the school board and mayor make strategic decisions regarding future school budgets and priorities.

* analyze the district-wide standardized test results
* aggregate the data to showcase obvious trends in school performance
* written description of at least two observable trends based on the data.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load 
school_data_to_load = "PyCitySchools_example/Resources/schools_complete.csv"
student_data_to_load = "PyCitySchools_example/Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
#school_data_complete.rename(columns={'school_name':"School Name","budget":"School Budget",'size':"Student Count", "student_name":"Student Name"},inplace = True)


school_data_complete.head(-10)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39155,39155,John Brooks,M,10th,Thomas High School,92,98,14,Charter,1635,1043130
39156,39156,Stephanie Contreras,F,11th,Thomas High School,79,95,14,Charter,1635,1043130
39157,39157,Kristen Gonzalez,F,9th,Thomas High School,79,94,14,Charter,1635,1043130
39158,39158,Kari Holloway,F,10th,Thomas High School,87,90,14,Charter,1635,1043130


In [2]:
school_data_complete[["reading_score", "math_score"]].describe()
school_data_complete.info()
#school_data_complete.columns

<class 'pandas.core.frame.DataFrame'>
Int64Index: 39170 entries, 0 to 39169
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Student ID     39170 non-null  int64 
 1   student_name   39170 non-null  object
 2   gender         39170 non-null  object
 3   grade          39170 non-null  object
 4   school_name    39170 non-null  object
 5   reading_score  39170 non-null  int64 
 6   math_score     39170 non-null  int64 
 7   School ID      39170 non-null  int64 
 8   type           39170 non-null  object
 9   size           39170 non-null  int64 
 10  budget         39170 non-null  int64 
dtypes: int64(6), object(5)
memory usage: 3.6+ MB


* Total number of unique schools
* Total students
* Total budget
* Average math score
* Average reading score
* % passing math (the percentage of students who passed math)
* % passing reading (the percentage of students who passed reading)
* % overall passing (the percentage of students who passed math AND reading)

In [8]:
# Total number of unique schools
count_records = school_data_complete['school_name'].count()
#print(f'Amount of Records: {count_records}')

count_unique = len(school_data_complete['school_name'].unique())
#print(f'Number of Unique Schools: {len(count_unique)}')

# Total students
total_students = len(school_data_complete['student_name'].unique())
#total_students = school_data_complete['student_name'].value_counts()


# Calculate the total number of students
student_count = school_data_complete.value_counts()

print(student_count)

#print(f'Number of Unique Students: {len(total_students)}')

# Total school budget
#total_budget_group = school_data_complete.groupby(['School Name',"School Budget"]).sum()
#print(total_budget_group)

school_budget_report = school_data_complete[['school_name', "budget",'size']].drop_duplicates().reset_index(drop=True)
total_budget = school_budget_report["budget"].sum()
#print(f'Total Budget of Schools: {total_budget}')


#passing_math_reading_count = school_data_complete[(school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70)].count()["student_name"]
avg_math = round(school_data_complete['math_score'].mean(),5)
avg_reading = round(school_data_complete['reading_score'].mean(),5)

passed_math = school_data_complete[(school_data_complete["math_score"] >= 70)].count()["student_name"]
passed_math = passed_math / float(total_students) * 100

passed_reading = school_data_complete[(school_data_complete["reading_score"] >= 70)].count()["student_name"]
passed_reading = round(passed_reading / float(total_students) * 100,5)

overall_passed = school_data_complete[(school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70)].count()["student_name"]
overall_passed = round(overall_passed / float(total_students) * 100,5)
#passed_reading = round(passed_reading / float(total_students) * 100,5)


details = {
    'Number of Schools' : round(count_unique,),
    'Total Students': ("{:,}".format(total_students)),
    'Total Budget': ("${:,.2f}".format(total_budget)),
    'Average Math score': avg_math,
    'Average Reading score': avg_reading,
    '% Passed Math': passed_math,
    '% Passed Reading': passed_reading,
    '% Overall Passed': overall_passed}


high_level_district_snapshot = pd.DataFrame(list(details.items()))
#high_level_district_snapshot["Total Students"] = high_level_district_snapshot[total_students].map("{:,}".format)
#high_level_district_snapshot["Total Budget"] = high_level_district_snapshot["Total Budget"].map("${:,.2f}".format)

high_level_district_snapshot.head(15)


Student ID  student_name              gender  grade  school_name            reading_score  math_score  School ID  type      size  budget 
0           Paul Bradley              M       9th    Huang High School      66             79          0          District  2917  1910635    1
26116       Nicole Bennett            F       11th   Rodriguez High School  69             57          11         District  3999  2547363    1
26109       Nathan Taylor             M       12th   Rodriguez High School  64             64          11         District  3999  2547363    1
26110       Brandon Sloan             M       12th   Rodriguez High School  91             98          11         District  3999  2547363    1
26111       Angela Thompson DDS       F       11th   Rodriguez High School  82             84          11         District  3999  2547363    1
                                                                                                                                            ..
1305

TypeError: cannot convert the series to <class 'float'>

* School name
* School type
* Total students
* Total school budget
* Per student budget
* Average math score
* Average reading score
* % passing math (the percentage of students who passed math)
* % passing reading (the percentage of students who passed reading)
* % overall passing (the percentage of students who passed math AND reading)

In [None]:
# Total school budget
total_budget_group = school_data_complete.groupby(['School Name',"School Budget"]).sum()
#print(total_budget_group)

school_budget_report = school_data_complete[['School Name', 'type','Student Count',"School Budget",]].drop_duplicates().reset_index(drop=True)
#school_budget_report.head(15)

#Per student budget
#school_budget_report.groupby("school")

school_budget_report["Budget_Per_Student"] = school_budget_report["School Budget"] / school_budget_report['Student Count']
#school_budget_report["Average_Math"] =
school_budget_report.head(15)


In [None]:
#Average math score
school_budget_report["Average Math Score"] = school_data_complete.loc[school_data_complete['School Name'] == "Huang High School", school_data_complete["math_score"]].mean()
school_budget_report.head()

#Average reading score

In [None]:
# passing math (the percentage of students who passed math)

# passing reading (the percentage of students who passed reading)

# overall passing (the percentage of students who passed math AND reading)

In [None]:
#Sort the schools by % Overall Passing in descending order and display the top 5 rows. Save the results in a DataFrame called "top_schools".

In [None]:
#Sort the schools by % Overall Passing in ascending order and display the top 5 rows.Save the results in a DataFrame called "bottom_schools".

In [None]:
#Perform the necessary calculations to create a DataFrame that lists the average math score for students of each grade level (9th, 10th, 11th, 12th) at each school.

In [None]:
#Create a DataFrame that lists the average reading score for students of each grade level (9th, 10th, 11th, 12th) at each school.

In [None]:
# Create a table that breaks down school performance based on average spending ranges (per student).
# Use pd.cut on the "Total Students" column of the per_school_summary DataFrame.

Create a DataFrame called size_summary that breaks down school performance based on school size (small, medium, or large).

pending_bins = [0, 585, 630, 645, 680]
labels = ["<$585", "$585-630", "$630-645", "$645-680"]