# School Data Analysis

A high-level snapshot of the district's key metrics, presented in a table format
An overview of the key metrics for each school, presented in a table format
Tables presenting each of the following metrics:
Top 5 and bottom 5 performing schools, based on the overall passing rate
The average math score received by students in each grade level at each school
The average reading score received by students in each grade level at each school
School performance based on the budget per student
School performance based on the school size 
School performance based on the type of school

In [42]:
# Add the Pandas dependency.
import pandas as pd, os


# Files to load
school_data_to_load = os.path.join("Resources","schools_complete.csv")
student_data_to_load = os.path.join("Resources","students_complete.csv")

# Read the school data file and store it in a Panda Dataframe
school_data_df = pd.read_csv(school_data_to_load)
student_data_df = pd.read_csv(student_data_to_load)

# Put the student names in a list
student_names = student_data_df["student_name"].tolist()

# Add each prefix and suffix to remove to a list.
prefixes_suffixes = ["Dr. ", "Mr. ","Ms. ", "Mrs. ", "Miss ", " MD", " DDS", " DVM", " PhD"]

# Iterate through the "prefixes_suffixes" list and replace them with an empty space, "" when it occurs
for word in prefixes_suffixes:
    student_data_df["student_name"] = student_data_df["student_name"].str.replace(word,"")



In [86]:
# Combine the data into a single dataset.
school_data_complete_df = pd.merge(student_data_df, school_data_df, on=["school_name", "school_name"])

# Get the total number of students
student_count = school_data_complete_df["Student ID"].count()

# Calculate the total number of schools.
school_count = school_data_df["school_name"].count()

# Calculate the total budget
total_budget = school_data_df["budget"].sum()



# Calculate average reading and average math scores
average_reading_score = school_data_complete_df["reading_score"].mean()
average_math_score = school_data_complete_df["math_score"].mean()

# Calcuate the number of students passing math.
passing_reading_count = school_data_complete_df[school_data_complete_df["reading_score"] >= 70]["Student ID"].count()

# Calcuate the number of students passing reading.
passing_math_count = school_data_complete_df[school_data_complete_df["math_score"] >= 70]["Student ID"].count()

# Calculate the percentage of students who passed math
passing_math_percentage = passing_math_count/float(student_count) * 100

# Calculate the percentage of students who passed reading
passing_reading_percentage = passing_reading_count/float(student_count) * 100

# Calculate the students who passes both math and reading.
overall_passing_math_reading_count = school_data_complete_df[(school_data_complete_df["math_score"]>=70) & (school_data_complete_df["reading_score"]>=70)]["Student ID"].count()

# Calcuate the overall passing percentage.
overall_passing_percentage = overall_passing_math_reading_count/float(student_count)*100

# Add a list of values with keys to create a new DataFrame.
district_summary_df = pd.DataFrame(
    [{"Total Schools": school_count,
          "Total Students": student_count,
          "Total Budget": total_budget,
          "Average Math Score": average_math_score,
          "Average Reading Score": average_reading_score,
          "% Passing Math": passing_math_percentage,
         "% Passing Reading": passing_reading_percentage,
        "% Overall Passing": overall_passing_percentage}])
district_summary_df

# Format the data in the District Summary DataFrame
district_summary_df["Total Students"] = district_summary_df["Total Students"].map("{:,}".format)
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,.2f}".format)
district_summary_df["Average Math Score"] = district_summary_df["Average Math Score"].map("{:.1f}".format)
district_summary_df["Average Reading Score"] = district_summary_df["Average Reading Score"].map("{:.1f}".format)
district_summary_df["% Passing Math"] = district_summary_df["% Passing Math"].map("{:.0f}".format)
district_summary_df["% Passing Reading"] = district_summary_df["% Passing Reading"].map("{:.0f}".format)
district_summary_df["% Overall Passing"] = district_summary_df["% Overall Passing"].map("{:.0f}".format)
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",79.0,81.9,75,86,65
