In [10]:
# Dependencies
import pandas as pd
import os

# Write results to individual excel files
write_to_excel = input("Write results to Excel files y/n -->")
use_school_student_data = input("Use school student data y/n -->")

# ----------------------------Begin District Summary ----------------------------------

# load in School Data 
csv_path = os.path.join('raw_data', 'schools_complete.csv')
schools = pd.read_csv(csv_path)

# load in Student Data
csv_path = os.path.join('raw_data', 'students_complete.csv')
students = pd.read_csv(csv_path)

# Combine into inner joined Dataframe over school name

school_student_data = pd.merge(schools, students, how='inner', left_on="name", right_on="school",
         left_index=False, right_index=False, sort=False,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate='1:m')
school_student_data = pd.merge(schools, students, how='inner', left_on="name", right_on="school")
school_student_data = pd.merge(schools, students, how='inner', left_on="name", right_on="school",
         left_index=False, right_index=True, sort=False,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate='1:m')

unique_schools = pd.Series(school_student_data["School ID"]).unique()
total_schools = len(unique_schools)

unique_students = pd.Series(school_student_data["Student ID"]).unique()
total_students = len(unique_students)

# Join Students and Schools (should be many to 1)   

student_school_data = pd.merge(students, schools, how='inner', left_on="school", right_on="name",
         left_index=True, right_index=True, sort=False,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate='m:1')
#student_school_data = pd.merge(students, schools, how='inner', left_on="school", right_on="name")

# Sum Budget
if use_school_student_data == 'n':
    total_budget = student_school_data["budget"].sum()
else:
    total_budget = school_student_data["budget"].sum()

# Average Math Score
#average_math_scores = students["math_score"].mean()
average_math_scores = school_student_data["math_score"].mean()

# Average Reading Score
#average_reading_scores = students["reading_score"].mean()
average_reading_scores = school_student_data["reading_score"].mean()

# % Passing Math
#passing_math = students.loc[students["math_score"] > 69].count()["Student ID"
passing_math = school_student_data.loc[school_student_data["math_score"] > 69].count()["Student ID"]
percent_passing_math = (passing_math/total_students) * 100

# % Passing Reading 
#passing_reading = students.loc[students["reading_score"] > 69].count()["Student ID"]
passing_reading = school_student_data.loc[school_student_data["reading_score"] > 69].count()["Student ID"]
percent_passing_reading = (passing_reading/total_students) * 100

# Overall Passing Rate: average of passing math and passing reading pcts
overall_passing_rate = (percent_passing_math + percent_passing_reading) / 2

# Prepare to display district summary 
district_summary = pd.DataFrame({"Total Schools": [total_schools],
                                 "Total Students": [total_students],
                                 "Total Budget": [total_budget],
                                 "Average Math Score": [average_math_scores],
                                 "Average Reading Score": [average_reading_scores],
                                 "% Passing Math":[percent_passing_math],
                                 "% Passing Reading":[percent_passing_reading],
                                 "% Overall Passing Rate": [overall_passing_rate],
                                   })
# Format budget as currency
district_summary["Total Budget"] = district_summary["Total Budget"].map("${0:,.2f}".format)

# Revise column order
col_list = ['Total Schools', 'Total Students', 'Total Budget', 'Average Math Score',
            'Average Reading Score','% Passing Math', '% Passing Reading','% Overall Passing Rate']

district_summary = district_summary[col_list]

if write_to_excel == 'y':
    writer = pd.ExcelWriter('district_summary.xlsx')
    district_summary.to_excel(writer,'Sheet1')

district_summary


Write results to Excel files y/n -->n
Use school student data y/n -->n


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,15,"$24,649,428.00",75.2,84.333333,66.666667,80.0,73.333333


In [None]:
# For School Summary Begin with joined school to student dataframe 
school_student_data= school_student_pd.rename(index=str, columns={"name_x": "school name", "name_y": "student name"})

# Insert Reading Score Passing Count Column: 1 = True and  0 = False (for % calculation later)
school_student_data= school_student_pd.assign(rsp_count = (school_student_pd['reading_score']>69) - 0)

# Insert Math Score Passing Count Column: 1 = True and  0 = False (for % calculation later)
school_student_data= school_student_pd.assign(msp_count = (school_student_pd['math_score']>69) - 0)
school_student_data= school_student_pd.assign(combined_name = school_student_pd['school name'] + ":" + 
                                             school_student_pd['type'])
# Insert 

# Get School Counts of Students
school_counts = school_student_pd["school name"].value_counts()
school_counts.head(14)

# Establish groupby dataframe based on school name
grouped_by_school_name = school_student_pd.groupby(['school name'])

# School ID by School
school_id = grouped_by_school_name["School ID"].mean()

# Budget by School
total_school_budget = grouped_by_school_name["budget"].mean()

# Average Math Score by School
per_student_math_score = grouped_by_school_name["math_score"].mean()

# Average Reading Score by School
per_student_reading_score = grouped_by_school_name["reading_score"].mean()

# Passing Math Scores by School
passing_math_count = grouped_by_school_name["msp_count"].sum()

# Passing Reading Scores by School
passing_reading_count = grouped_by_school_name["rsp_count"].sum()

# Create New Summary Data Frame
schools_summary_table = pd.DataFrame({"Number of Students":school_counts,
                                      "Budget":total_school_budget,
                                     "Math Score":per_student_math_score,
                                     "Reading Score":per_student_reading_score,
                                     "Passing Math Count":passing_math_count,
                                     "Passing Reading Count":passing_reading_count,
                                     "School_ID":school_id})

# Insert Budget Spent Per Student column into Summary
schools_summary_table["Per Student Budget"] = schools_summary_table["Budget"] / schools_summary_table["Number of Students"]

# Insert % Passed Reading to Summary column into Summary
schools_summary_table["% Passing Math"] = (schools_summary_table["Passing Math Count"] / schools_summary_table["Number of Students"]) * 100

# Insert % Passed Reading to Summary column into Summary
schools_summary_table["% Passing Reading"] = (schools_summary_table["Passing Reading Count"] / schools_summary_table["Number of Students"]) * 100
schools_summary_table.head()

# Insert # Overall Passing Rate column into Summary
schools_summary_table["% Overall Passing Rate"] = (schools_summary_table["% Passing Reading"] + schools_summary_table["% Passing Math"]) / 2

# Join with Schools df to pickup School Type and School Size  
schools_summary_table = pd.merge(schools_summary_table, schools, how='inner', left_on="School_ID", right_on="School ID",
                         left_index=False, right_index=False, sort=False,
                         suffixes=('_x', '_y'), copy=True, indicator=False,
                         validate='1:m')

schools_summary_table["type#"]  = ((schools_summary_table['type'] == 'District') + 1)

# Rename column names to more descri)ptive names
ss_table = schools_summary_table.rename(columns={
                                                'name': 'School Name',
                                                'type':'School Type',
                                                'Number of Students':'Total Students',
                                                'budget':'Total School Budget',
                                                'Math Score':'Average Math Score',
                                                'Reading Score':'Average Reading Score'
                                                })
# Select columns needed for the report
ss_table = ss_table[['School Name','School Type','Total Students','Total School Budget','Per Student Budget',
                      'Average Math Score','Average Reading Score','% Passing Math','% Passing Reading',
                    '% Overall Passing Rate']]

ss_table.set_index('School Name', inplace=True)

ss_table.index.name=''

ss_table["Total School Budget"] = ss_table["Total School Budget"].map("${0:,.2f}".format)
ss_table["Per Student Budget"] = ss_table["Per Student Budget"].map("${0:,.2f}".format)

if write_to_excel == 'y':
    writer = pd.ExcelWriter('school_summary.xlsx')
    ss_table.to_excel(writer,'Sheet1')
    
# Show Final School Summary
ss_table


In [None]:
# Begin Top Performing Schools (By Passing Rate)
top_performing = ss_table.sort_values(['% Overall Passing Rate'],ascending=False)

if write_to_excel == 'y':
    writer = pd.ExcelWriter('top_performing.xlsx')
    top_performing.head(5).to_excel(writer,'Sheet1')
    
top_performing.head(5)

In [None]:
bottom_performing = ss_table.sort_values(['% Overall Passing Rate'],ascending=True)

if write_to_excel == 'y':
    writer = pd.ExcelWriter('bottom_performing.xlsx')
    bottom_performing.head(5).to_excel(writer,'Sheet1')
    
bottom_performing.head(5)

In [None]:
# Math Scores by Grade 
# Select each grade and calculate the average for the math scores grouped by school
grade_9_scores = students[students['grade'] == '9th'].groupby('school')['math_score'].mean()
grade_10_scores = students[students['grade'] == '10th'].groupby('school')['math_score'].mean()
grade_11_scores = students[students['grade'] == '11th'].groupby('school')['math_score'].mean()
grade_12_scores = students[students['grade'] == '12th'].groupby('school')['math_score'].mean()

# Create New Summary Data Frame
math_scores_by_grade = pd.DataFrame({
                                "9th":grade_9_scores,
                                "10th":grade_10_scores,
                                "11th":grade_11_scores,
                                "12th":grade_12_scores
                                })
# Reorder Columns in df since we want 9th to be first column
math_scores_by_grade = math_scores_by_grade[['9th', '10th', '11th', '12th']]

math_scores_by_grade.index.name=''

if write_to_excel == 'y':
    writer = pd.ExcelWriter('math_scores_by_grade.xlsx')
    math_scores_by_grade.to_excel(writer,'Sheet1')

math_scores_by_grade


In [None]:
# Reading Scores by Grade 
# Select each grade and calculate the average for the math scores grouped by school
grade_9_scores = students[students['grade'] == '9th'].groupby('school')['reading_score'].mean()
grade_10_scores = students[students['grade'] == '10th'].groupby('school')['reading_score'].mean()
grade_11_scores = students[students['grade'] == '11th'].groupby('school')['reading_score'].mean()
grade_12_scores = students[students['grade'] == '12th'].groupby('school')['reading_score'].mean()

# Create New Summary Data Frame
reading_scores_by_grade = pd.DataFrame({
                                "9th":grade_9_scores,
                                "10th":grade_10_scores,
                                "11th":grade_11_scores,
                                "12th":grade_12_scores
                                })
# Reorder Columns in df since we want 9th to be first column
reading_scores_by_grade = reading_scores_by_grade[['9th', '10th', '11th', '12th']]

reading_scores_by_grade.index.name=''

if write_to_excel == 'y':
    writer = pd.ExcelWriter('reading_scores_by_grade.xlsx')
    reading_scores_by_grade.to_excel(writer,'Sheet1')

reading_scores_by_grade

In [None]:
# Begin Spending Range Per Student Calculations

sp_range = schools_summary_table.rename(columns={'Math Score':'Average Math Score',
                                                  'Reading Score':'Average Reading Score'})
bins = [0, 580, 620, 640, 700] 
# Set up labels to compute when bins is updated
group_labels = ['<' + str(bins[1]-1), 
                str(bins[1]) + "-" + str(bins[2]-1), 
                str(bins[2]) + "-" + str(bins[3]-1),
                str(bins[3]) + "-" + str(bins[4])]
               
# Add new column with range information based values in Per Student Budget
sp_range['Spending Ranges (Per Student)'] = pd.cut(sp_range['Per Student Budget'],
                                                     bins, labels=group_labels)
# Get needed columns 
sp_range = sp_range[['Spending Ranges (Per Student)', 
                         'Average Math Score',
                         'Average Reading Score',
                         '% Passing Math',
                         '% Passing Reading',
                         '% Overall Passing Rate'
                        ]]
# Roll up to the Grouped By Object (Spending Ranges)
sp_range = sp_range.groupby("Spending Ranges (Per Student)")

if write_to_excel == 'y':
    writer = pd.ExcelWriter('spending_range_per_student.xlsx')
    sp_range.max().to_excel(writer,'Sheet1')

# Show Final Spending Range Summary
sp_range.max()


In [None]:
# Begin Size Range Calculations

sz_range = schools_summary_table.rename(columns={'Math Score':'Average Math Score',
                                                  'Reading Score':'Average Reading Score'
                                                  })
bins = [0, 1000, 2000, 5000] 
# Set up labels to compute when bins is updated
group_labels = ['Small(<' + str(bins[1]-1) + ')', 
                'Medium (' + str(bins[1]) + "-" + str(bins[2]-1) + ')', 
                'Large (' + str(bins[2]) + "-" + str(bins[3]) + ')']

# Add new column with range information based values in Per Student Budget
sz_range['School Size'] = pd.cut(sz_range['size'], bins, labels=group_labels)

# Get needed columns 
sz_range = sz_range[['School Size', 
                         'Average Math Score',
                         'Average Reading Score',
                         '% Passing Math',
                         '% Passing Reading',
                         '% Overall Passing Rate'
                        ]]
# Roll up to the Grouped By Object (School Size)
sz_range = sz_range.groupby('School Size')

if write_to_excel == 'y':
    writer = pd.ExcelWriter('spending_range_per_size.xlsx')
    sz_range.max().to_excel(writer,'Sheet1')
    
# Show Final Spending Range Summary
sz_range.max()


In [None]:
# Begin School Type Range Calculations

st_range = schools_summary_table.rename(columns={'Math Score':'Average Math Score',
                                                  'Reading Score':'Average Reading Score'
                                                  })
bins = [0, 1, 2] 
# Set up labels to compute when bins is updated
group_labels = ['Charter', 'District']

# Add new column with range information based values in Per Student Budget
st_range['School Type'] = pd.cut(st_range['type#'], bins, labels=group_labels)

# Get needed columns 
st_range = st_range[['School Type', 
                     'Average Math Score',
                     'Average Reading Score',
                     '% Passing Math',
                     '% Passing Reading',
                     '% Overall Passing Rate'
                    ]]
# Roll up to the Grouped By Object (School Type)
st_range = st_range.groupby('School Type')

if write_to_excel == 'y':
    writer = pd.ExcelWriter('school_type_range.xlsx')
    st_range.max().to_excel(writer,'Sheet1')
# Show Final Spending Range Summary
st_range.max()
