In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [2]:
overall_data_df = pd.DataFrame(school_data_complete)
#school_data_df = pd.DataFrame(school_data)
#student_data_df = pd.DataFrame(student_data)

<h1> District Summary </h1>

In [3]:
# Calculate total number of schools and students
total_schools = overall_data_df['school_name'].nunique()
total_students = overall_data_df['student_name'].count()

In [4]:
# Calculate budget total
total_budget = sum(overall_data_df['budget'].unique())


In [5]:
# Format to currency
total_budget = "${0:,.2f}".format(total_budget)
print(total_budget)

$24,649,428.00


In [6]:
# Calc average math and reading score
avg_math = round(overall_data_df['math_score'].mean(), 2)
avg_read = round(overall_data_df['reading_score'].mean(), 2)

In [7]:
# Calc average passing score for math and reading (looking for values at or higher than 70)
math_pass = round(((overall_data_df['math_score'] >= 70).mean())*100, 2)
read_pass = round(((overall_data_df['reading_score'] >= 70).mean())*100, 2)

In [8]:
# Combinning both math and reading data into one column to also calucate passing rate
overall_pass = overall_data_df[(overall_data_df['math_score'] >= 70) & (overall_data_df['reading_score'] >= 70)]

In [9]:
overall_passV2 = round((overall_pass.student_name.count()/total_students)*100, 2)

In [10]:
# Gather raw data to create new data frame
summary_raw_data = {
    'Total School': [total_schools],
    'Total Students': [total_students],
    'Total Budget': [total_budget],
    'Average Math Score': [avg_math],
    'Average Reading Score': [avg_read],
    '% Passing Math': [math_pass],
    '% Passing Reading': [read_pass],
    '% Overall Passing': [overall_passV2]
}


In [11]:
District_Summary_df = pd.DataFrame(summary_raw_data)
District_Summary_df

Unnamed: 0,Total School,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.99,81.88,74.98,85.81,65.17


<h1> School Summary </h>

In [12]:
# Making a copy of OG df to add extra columns (dont want to mess with the OG data)
overall_data_plus_avg_df = overall_data_df
overall_data_plus_avg_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [13]:
# Creating new colums with passing scores for math, reading, and both math & reading
overall_data_plus_avg_df['%_passing_math'] = (overall_data_plus_avg_df.math_score >= 70)
overall_data_plus_avg_df['%_passing_reading'] = (overall_data_plus_avg_df.reading_score >= 70)
overall_data_plus_avg_df['%_Overall_Passing'] = ((overall_data_plus_avg_df.math_score >= 70) & (overall_data_plus_avg_df.reading_score >= 70))
overall_data_plus_avg_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget,%_passing_math,%_passing_reading,%_Overall_Passing
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635,True,False,False
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635,False,True,False
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635,False,True,False
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635,False,False,False
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635,True,True,True


In [14]:
# Average for math and reading
# Note:Total budget and total students will not be the average because their data input was consistant (i.e. the avg of a repeated value, is the same value)
school_df = overall_data_plus_avg_df.groupby(['school_name', 'type']).mean().reset_index()
school_df.head()

Unnamed: 0,school_name,type,Student ID,reading_score,math_score,School ID,size,budget,%_passing_math,%_passing_reading,%_Overall_Passing
0,Bailey High School,District,20358.5,81.033963,77.048432,7.0,4976.0,3124928.0,0.666801,0.819333,0.546423
1,Cabrera High School,Charter,16941.5,83.97578,83.061895,6.0,1858.0,1081356.0,0.941335,0.970398,0.913348
2,Figueroa High School,District,4391.0,81.15802,76.711767,1.0,2949.0,1884411.0,0.659885,0.807392,0.532045
3,Ford High School,District,36165.0,80.746258,77.102592,13.0,2739.0,1763916.0,0.683096,0.79299,0.542899
4,Griffin High School,Charter,12995.5,83.816757,83.351499,4.0,1468.0,917500.0,0.933924,0.97139,0.905995


In [15]:
# identifying columns to select the ones of interest
list(school_df.columns)

['school_name',
 'type',
 'Student ID',
 'reading_score',
 'math_score',
 'School ID',
 'size',
 'budget',
 '%_passing_math',
 '%_passing_reading',
 '%_Overall_Passing']

In [16]:
# Selecting columns of interest
school_df = school_df.loc[:,['school_name', 'type', 'size', 'budget', 'math_score', 'reading_score', '%_passing_math', '%_passing_reading', '%_Overall_Passing']]
school_df.head()

Unnamed: 0,school_name,type,size,budget,math_score,reading_score,%_passing_math,%_passing_reading,%_Overall_Passing
0,Bailey High School,District,4976.0,3124928.0,77.048432,81.033963,0.666801,0.819333,0.546423
1,Cabrera High School,Charter,1858.0,1081356.0,83.061895,83.97578,0.941335,0.970398,0.913348
2,Figueroa High School,District,2949.0,1884411.0,76.711767,81.15802,0.659885,0.807392,0.532045
3,Ford High School,District,2739.0,1763916.0,77.102592,80.746258,0.683096,0.79299,0.542899
4,Griffin High School,Charter,1468.0,917500.0,83.351499,83.816757,0.933924,0.97139,0.905995


In [17]:
# Create per student budget column and insert as the 5th column
school_df.insert(4, "per_student_budget", (school_df['budget']/school_df['size']), True)

In [18]:
# Check df
school_df.head()

Unnamed: 0,school_name,type,size,budget,per_student_budget,math_score,reading_score,%_passing_math,%_passing_reading,%_Overall_Passing
0,Bailey High School,District,4976.0,3124928.0,628.0,77.048432,81.033963,0.666801,0.819333,0.546423
1,Cabrera High School,Charter,1858.0,1081356.0,582.0,83.061895,83.97578,0.941335,0.970398,0.913348
2,Figueroa High School,District,2949.0,1884411.0,639.0,76.711767,81.15802,0.659885,0.807392,0.532045
3,Ford High School,District,2739.0,1763916.0,644.0,77.102592,80.746258,0.683096,0.79299,0.542899
4,Griffin High School,Charter,1468.0,917500.0,625.0,83.351499,83.816757,0.933924,0.97139,0.905995


In [19]:
# Format columns with for currency and percent

# create function to convert columns to currentcy 
def format_curr(x):
    return "${0:,.2f}".format(x)

# apply function on two columns
school_df['budget'] = school_df['budget'].apply(format_curr)
school_df['per_student_budget'] = school_df['per_student_budget'].apply(format_curr)

# Multiply by 100 and round to two sigfigs for % columns
school_df['%_passing_math'] = round(school_df['%_passing_math'] * 100, 2)
school_df['%_passing_reading'] = round(school_df['%_passing_reading'] * 100, 2)
school_df['%_Overall_Passing'] = round(school_df['%_Overall_Passing'] * 100, 2)

# Rouond to two sigfigs for math and reading scores
school_df['math_score'] = round(school_df['math_score'], 2)
school_df['reading_score'] = round(school_df['reading_score'] , 2)
school_df.head()


Unnamed: 0,school_name,type,size,budget,per_student_budget,math_score,reading_score,%_passing_math,%_passing_reading,%_Overall_Passing
0,Bailey High School,District,4976.0,"$3,124,928.00",$628.00,77.05,81.03,66.68,81.93,54.64
1,Cabrera High School,Charter,1858.0,"$1,081,356.00",$582.00,83.06,83.98,94.13,97.04,91.33
2,Figueroa High School,District,2949.0,"$1,884,411.00",$639.00,76.71,81.16,65.99,80.74,53.2
3,Ford High School,District,2739.0,"$1,763,916.00",$644.00,77.1,80.75,68.31,79.3,54.29
4,Griffin High School,Charter,1468.0,"$917,500.00",$625.00,83.35,83.82,93.39,97.14,90.6


In [20]:
# Rename Columns
school_df= school_df.rename(columns = {"school_name":"School Name", "type":"School Type", "size":"Total Students","budget":"Total School Budget","per_student_budget":"Per Student Budget","math_score":"Average Math Score", "reading_score":"Average Reading score", "%_passing_math":"% Passing Math", "%_passing_reading":"% Passing Reading", "%_Overall_Passing":"% Overall Passing"})
school_df

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading score,% Passing Math,% Passing Reading,% Overall Passing
0,Bailey High School,District,4976.0,"$3,124,928.00",$628.00,77.05,81.03,66.68,81.93,54.64
1,Cabrera High School,Charter,1858.0,"$1,081,356.00",$582.00,83.06,83.98,94.13,97.04,91.33
2,Figueroa High School,District,2949.0,"$1,884,411.00",$639.00,76.71,81.16,65.99,80.74,53.2
3,Ford High School,District,2739.0,"$1,763,916.00",$644.00,77.1,80.75,68.31,79.3,54.29
4,Griffin High School,Charter,1468.0,"$917,500.00",$625.00,83.35,83.82,93.39,97.14,90.6
5,Hernandez High School,District,4635.0,"$3,022,020.00",$652.00,77.29,80.93,66.75,80.86,53.53
6,Holden High School,Charter,427.0,"$248,087.00",$581.00,83.8,83.81,92.51,96.25,89.23
7,Huang High School,District,2917.0,"$1,910,635.00",$655.00,76.63,81.18,65.68,81.32,53.51
8,Johnson High School,District,4761.0,"$3,094,650.00",$650.00,77.07,80.97,66.06,81.22,53.54
9,Pena High School,Charter,962.0,"$585,858.00",$609.00,83.84,84.04,94.59,95.95,90.54


<h1> Top Performing Schools (By % Overall Passing) </h1>

In [21]:
# Sort overall passin scores in descending order
desc_school_df = school_df.sort_values(by=['% Overall Passing'], ascending = False)

In [22]:
# slice the top 5 and reset the index
desc_school_df = desc_school_df.iloc[ 0:5, :].reset_index(drop = True)
desc_school_df

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading score,% Passing Math,% Passing Reading,% Overall Passing
0,Cabrera High School,Charter,1858.0,"$1,081,356.00",$582.00,83.06,83.98,94.13,97.04,91.33
1,Thomas High School,Charter,1635.0,"$1,043,130.00",$638.00,83.42,83.85,93.27,97.31,90.95
2,Griffin High School,Charter,1468.0,"$917,500.00",$625.00,83.35,83.82,93.39,97.14,90.6
3,Wilson High School,Charter,2283.0,"$1,319,574.00",$578.00,83.27,83.99,93.87,96.54,90.58
4,Pena High School,Charter,962.0,"$585,858.00",$609.00,83.84,84.04,94.59,95.95,90.54


<h1> Bottom Performoing School (By % Overall Passing) </h1>

In [23]:
# Sort overall passin scores in asceding order
asc_school_df = school_df.sort_values(by=['% Overall Passing'], ascending = True)

In [24]:
# Slice the top 5 and reset the index
desc_school_df = asc_school_df.iloc[ 0:5, :].reset_index(drop = True)
desc_school_df

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading score,% Passing Math,% Passing Reading,% Overall Passing
0,Rodriguez High School,District,3999.0,"$2,547,363.00",$637.00,76.84,80.74,66.37,80.22,52.99
1,Figueroa High School,District,2949.0,"$1,884,411.00",$639.00,76.71,81.16,65.99,80.74,53.2
2,Huang High School,District,2917.0,"$1,910,635.00",$655.00,76.63,81.18,65.68,81.32,53.51
3,Hernandez High School,District,4635.0,"$3,022,020.00",$652.00,77.29,80.93,66.75,80.86,53.53
4,Johnson High School,District,4761.0,"$3,094,650.00",$650.00,77.07,80.97,66.06,81.22,53.54


<h1> Math Scores by Grade </h1>

In [25]:
overall_data_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget,%_passing_math,%_passing_reading,%_Overall_Passing
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635,True,False,False
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635,False,True,False
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635,False,True,False
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635,False,False,False
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635,True,True,True


In [26]:
# Slice new data frame
math_by_grade_df = overall_data_df.loc[:, ['school_name', 'grade', 'math_score']]

In [27]:
math_by_grade_df.head()

Unnamed: 0,school_name,grade,math_score
0,Huang High School,9th,79
1,Huang High School,12th,61
2,Huang High School,12th,60
3,Huang High School,12th,58
4,Huang High School,9th,84


In [28]:
# Group data by school name and grade and reset index
math_by_grade_df = math_by_grade_df.groupby(['school_name', 'grade']).math_score.mean().reset_index()
math_by_grade_df.head()

Unnamed: 0,school_name,grade,math_score
0,Bailey High School,10th,76.996772
1,Bailey High School,11th,77.515588
2,Bailey High School,12th,76.492218
3,Bailey High School,9th,77.083676
4,Cabrera High School,10th,83.154506


In [29]:
# Pivot df making each grade a column with respected masth scores
math_by_grade_df = math_by_grade_df.pivot(index='school_name', columns='grade', values='math_score').reset_index()
math_by_grade_df.head()

grade,school_name,10th,11th,12th,9th
0,Bailey High School,76.996772,77.515588,76.492218,77.083676
1,Cabrera High School,83.154506,82.76556,83.277487,83.094697
2,Figueroa High School,76.539974,76.884344,77.151369,76.403037
3,Ford High School,77.672316,76.918058,76.179963,77.361345
4,Griffin High School,84.229064,83.842105,83.356164,82.04401


In [30]:
# Rename and Reorganize df
math_by_grade_df = math_by_grade_df.rename(columns = {"school_name":"School Name"})
math_by_grade_df = math_by_grade_df[['School Name', '9th', '10th', '11th', '12th']]
math_by_grade_df.head()

grade,School Name,9th,10th,11th,12th
0,Bailey High School,77.083676,76.996772,77.515588,76.492218
1,Cabrera High School,83.094697,83.154506,82.76556,83.277487
2,Figueroa High School,76.403037,76.539974,76.884344,77.151369
3,Ford High School,77.361345,77.672316,76.918058,76.179963
4,Griffin High School,82.04401,84.229064,83.842105,83.356164


<h1>Reading Scores by Grade </h1>

In [31]:
overall_data_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget,%_passing_math,%_passing_reading,%_Overall_Passing
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635,True,False,False
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635,False,True,False
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635,False,True,False
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635,False,False,False
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635,True,True,True


In [32]:
# Slice new data frame
read_by_grade_df = overall_data_df.loc[:, ['school_name', 'grade', 'reading_score']]
read_by_grade_df.head()

Unnamed: 0,school_name,grade,reading_score
0,Huang High School,9th,66
1,Huang High School,12th,94
2,Huang High School,12th,90
3,Huang High School,12th,67
4,Huang High School,9th,97


In [33]:
# Groupby
read_by_grade_df = read_by_grade_df.groupby(['school_name', 'grade']).reading_score.mean().reset_index()
read_by_grade_df.head()

Unnamed: 0,school_name,grade,reading_score
0,Bailey High School,10th,80.907183
1,Bailey High School,11th,80.945643
2,Bailey High School,12th,80.912451
3,Bailey High School,9th,81.303155
4,Cabrera High School,10th,84.253219


In [34]:
# Pivot table making grade level columns
read_by_grade_df = read_by_grade_df.pivot(index = 'school_name', columns='grade', values='reading_score').reset_index()
read_by_grade_df = read_by_grade_df.rename(columns = {"school_name":"School Name"})
read_by_grade_df = read_by_grade_df[['School Name', '9th', '10th', '11th', '12th']]
read_by_grade_df.head()

grade,School Name,9th,10th,11th,12th
0,Bailey High School,81.303155,80.907183,80.945643,80.912451
1,Cabrera High School,83.676136,84.253219,83.788382,84.287958
2,Figueroa High School,81.198598,81.408912,80.640339,81.384863
3,Ford High School,80.632653,81.262712,80.403642,80.662338
4,Griffin High School,83.369193,83.706897,84.288089,84.013699


<h1>Scores by School Spending </h1>

In [35]:
# Create a df copy of overall data to manipulate
spending_df = overall_data_df.reset_index()

#create a df copy for "Scores by School Type"
stype_df = overall_data_df.reset_index()

In [36]:
# Getting specific columns
spending_df = spending_df.loc[:,['size', 'budget', 'math_score', 'reading_score', "%_passing_math", '%_passing_reading', '%_Overall_Passing']]
 
# make a copy of df for "Scores by School Size"
school_size_df = spending_df

In [37]:
# inserting additional column 
spending_df.insert(4, "per_student_budget", (spending_df['budget']/spending_df['size']), True)
spending_df.head()

Unnamed: 0,size,budget,math_score,reading_score,per_student_budget,%_passing_math,%_passing_reading,%_Overall_Passing
0,2917,1910635,79,66,655.0,True,False,False
1,2917,1910635,61,94,655.0,False,True,False
2,2917,1910635,60,90,655.0,False,True,False
3,2917,1910635,58,67,655.0,False,False,False
4,2917,1910635,84,97,655.0,True,True,True


In [38]:
# Triming df
spending_df = spending_df.loc[:, ['math_score', 'reading_score', 'per_student_budget','%_passing_math', '%_passing_reading', '%_Overall_Passing']]
spending_df.head()

Unnamed: 0,math_score,reading_score,per_student_budget,%_passing_math,%_passing_reading,%_Overall_Passing
0,79,66,655.0,True,False,False
1,61,94,655.0,False,True,False
2,60,90,655.0,False,True,False
3,58,67,655.0,False,False,False
4,84,97,655.0,True,True,True


In [39]:
# Create bins and group name
bins = [0, 584.9, 629.9, 644.9, 679.9]
spending_rage = ['< $585', '$585 - 630', '$630 - 645', '$645 - 680']

# Bin df by group name
spending_df['Spending_Range'] = pd.cut(spending_df['per_student_budget'], bins= bins, labels = spending_rage)
spending_df.head()

Unnamed: 0,math_score,reading_score,per_student_budget,%_passing_math,%_passing_reading,%_Overall_Passing,Spending_Range
0,79,66,655.0,True,False,False,$645 - 680
1,61,94,655.0,False,True,False,$645 - 680
2,60,90,655.0,False,True,False,$645 - 680
3,58,67,655.0,False,False,False,$645 - 680
4,84,97,655.0,True,True,True,$645 - 680


In [40]:
spending_df = spending_df.groupby(['Spending_Range']).mean().reset_index()
spending_df.head()

Unnamed: 0,Spending_Range,math_score,reading_score,per_student_budget,%_passing_math,%_passing_reading,%_Overall_Passing
0,< $585,83.363065,83.964039,580.781564,0.937029,0.966866,0.906407
1,$585 - 630,79.982873,82.312643,620.146831,0.791099,0.885131,0.709392
2,$630 - 645,77.821056,81.301007,639.358771,0.706236,0.826002,0.588412
3,$645 - 680,77.049297,81.005604,651.937383,0.662308,0.811094,0.535288


In [41]:
# Formatting
spending_df = spending_df.loc[:, ['Spending_Range','math_score', 'reading_score','%_passing_math', '%_passing_reading', '%_Overall_Passing']]

In [42]:
spending_df['math_score'] = round(spending_df['math_score'], 2)
spending_df['reading_score'] = round(spending_df['reading_score'], 2)
spending_df['%_passing_math'] = round(spending_df['%_passing_math'] * 100, 2)
spending_df['%_passing_reading'] = round(spending_df['%_passing_reading'] * 100, 2)
spending_df['%_Overall_Passing'] = round(spending_df['%_Overall_Passing'] * 100, 2)

In [43]:
# Rename columns
spending_df = spending_df.rename(columns = {'Spending_Range':'Spending Range', 'math_score':'Average Math Score', 'reading_score':'Average Reading Score', '%_passing_math':'% Passing Math', '%_passing_reading':'% Passing Reading', '%_Overall_Passing': '% Overall Passing'})
spending_df.head()

Unnamed: 0,Spending Range,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,< $585,83.36,83.96,93.7,96.69,90.64
1,$585 - 630,79.98,82.31,79.11,88.51,70.94
2,$630 - 645,77.82,81.3,70.62,82.6,58.84
3,$645 - 680,77.05,81.01,66.23,81.11,53.53


<h1> Scores by School Size </h1>

In [44]:
school_size_df.head()

Unnamed: 0,size,budget,math_score,reading_score,per_student_budget,%_passing_math,%_passing_reading,%_Overall_Passing
0,2917,1910635,79,66,655.0,True,False,False
1,2917,1910635,61,94,655.0,False,True,False
2,2917,1910635,60,90,655.0,False,True,False
3,2917,1910635,58,67,655.0,False,False,False
4,2917,1910635,84,97,655.0,True,True,True


In [45]:
# Create bins and group name
size_bins = [0, 999, 1999, 4999]
school_rage = ['Small (<1000)', 'Medium (1000 - 2000)', 'Large (2000 - 5000)']

# Bin df by group name
school_size_df['School Size'] = pd.cut(school_size_df['size'], bins= size_bins, labels = school_rage)
school_size_df.head()

Unnamed: 0,size,budget,math_score,reading_score,per_student_budget,%_passing_math,%_passing_reading,%_Overall_Passing,School Size
0,2917,1910635,79,66,655.0,True,False,False,Large (2000 - 5000)
1,2917,1910635,61,94,655.0,False,True,False,Large (2000 - 5000)
2,2917,1910635,60,90,655.0,False,True,False,Large (2000 - 5000)
3,2917,1910635,58,67,655.0,False,False,False,Large (2000 - 5000)
4,2917,1910635,84,97,655.0,True,True,True,Large (2000 - 5000)


In [46]:
# Group by size
school_size_df = school_size_df.groupby(['School Size']).mean().reset_index()
school_size_df.head()

Unnamed: 0,School Size,size,budget,math_score,reading_score,per_student_budget,%_passing_math,%_passing_reading,%_Overall_Passing
0,Small (<1000),797.532757,482022.0,83.828654,83.974082,600.392369,0.939525,0.960403,0.901368
1,Medium (1000 - 2000),1715.739732,1033931.0,83.372682,83.867989,604.081906,0.936165,0.967731,0.906243
2,Large (2000 - 5000),3924.352268,2510395.0,77.477597,81.198674,638.008715,0.686524,0.821252,0.56574


In [47]:
# Formatting
school_size_df = school_size_df.loc[:, ['School Size','math_score', 'reading_score','%_passing_math', '%_passing_reading', '%_Overall_Passing']]

In [48]:
school_size_df['math_score'] = round(school_size_df['math_score'], 2)
school_size_df['reading_score'] = round(school_size_df['reading_score'], 2)
school_size_df['%_passing_math'] = round(school_size_df['%_passing_math'] * 100, 2)
school_size_df['%_passing_reading'] = round(school_size_df['%_passing_reading'] * 100, 2)
school_size_df['%_Overall_Passing'] = round(school_size_df['%_Overall_Passing'] * 100, 2)

In [49]:
# Rename columns
school_size_df = school_size_df.rename(columns = {'Spending_Range':'Spending Range', 'math_score':'Average Math Score', 'reading_score':'Average Reading Score', '%_passing_math':'% Passing Math', '%_passing_reading':'% Passing Reading', '%_Overall_Passing': '% Overall Passing'})
school_size_df.head()

Unnamed: 0,School Size,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Small (<1000),83.83,83.97,93.95,96.04,90.14
1,Medium (1000 - 2000),83.37,83.87,93.62,96.77,90.62
2,Large (2000 - 5000),77.48,81.2,68.65,82.13,56.57


<h1> Scores by School Type </h1>

In [50]:
# Getting specific columns
stype_df = stype_df.loc[:,['type', 'math_score', 'reading_score', "%_passing_math", '%_passing_reading', '%_Overall_Passing']]
stype_df.head()

Unnamed: 0,type,math_score,reading_score,%_passing_math,%_passing_reading,%_Overall_Passing
0,District,79,66,True,False,False
1,District,61,94,False,True,False
2,District,60,90,False,True,False
3,District,58,67,False,False,False
4,District,84,97,True,True,True


In [53]:
stype_df = stype_df.groupby(['type']).mean().reset_index()
stype_df.head()

Unnamed: 0,type,math_score,reading_score,%_passing_math,%_passing_reading,%_Overall_Passing
0,Charter,83.406183,83.902821,0.937018,0.966459,0.905609
1,District,76.987026,80.962485,0.665184,0.809052,0.536959


In [55]:
# Formatting Percents
stype_df['math_score'] = round(stype_df['math_score'], 2)
stype_df['reading_score'] = round(stype_df['reading_score'], 2)
stype_df['%_passing_math'] = round(stype_df['%_passing_math'] * 100, 2)
stype_df['%_passing_reading'] = round(stype_df['%_passing_reading'] * 100, 2)
stype_df['%_Overall_Passing'] = round(stype_df['%_Overall_Passing'] * 100, 2)

In [57]:
# Rename Columns
stype_df = stype_df.rename(columns = {'type':'School Type', 'math_score':'Average Math Score', 'reading_score':'Average Reading Score', '%_passing_math':'% Passing Math', '%_passing_reading':'% Passing Reading', '%_Overall_Passing': '% Overall Passing'})
stype_df

Unnamed: 0,School Type,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Charter,83.41,83.9,93.7,96.65,90.56
1,District,76.99,80.96,66.52,80.91,53.7
