In [14]:
# Import Dependencies
import pandas as pd

In [15]:
#create data frames from csv
schools_df = pd.read_csv('Resources/schools_complete.csv')
students_df = pd.read_csv('Resources/students_complete.csv')

In [16]:
# Merge data frames  
schools_full_df = pd.merge(students_df, schools_df, how="left", on=["school_name", "school_name"])
schools_full_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [17]:
## District Summary
#Calculate the total number of schools
num_schools = len(schools_df)

# Calculate the total number of students
num_students = len(students_df)

# Calculate the total budget
total_budget = schools_df['budget'].sum()

# Calculate the average math score 
math_score_ave = students_df['math_score'].mean()

# Calculate the average reading score
read_score_ave = students_df['reading_score'].mean()

# Calculate the percentage of students with a passing math score (70 or greater)
math_pass_df = students_df.loc[students_df['math_score'] >= 70, :]
per_pass_math = len(math_pass_df)
per_pass_math = per_pass_math / num_students * 100

# Calculate the percentage of students with a passing reading score (70 or greater)
per_pass_read = len(students_df.loc[students_df['reading_score'] >= 70, :])
per_pass_read = per_pass_read / num_students * 100

# Calculate the percentage of students who passed math **and** reading (% Overall Passing)
per_pass = len(math_pass_df.loc[math_pass_df['reading_score'] >= 70, :])
per_pass = per_pass / num_students * 100

# Create a dataframe to hold the above results
district_df = pd.DataFrame({'Total schools' : [num_schools],
             'Total students' : num_students,
             'Total budget' : total_budget,
             'Average math score' : math_score_ave,
             'Average reading score' : read_score_ave,
             '% Passing math' : per_pass_math,
             '% Passing reading' : per_pass_read,
             '% Passing both' : per_pass})

#Proper formatting
district_df['Total schools'] = district_df['Total schools'].map('{:,}'.format)
district_df['Total students'] = district_df['Total students'].map('{:,}'.format)
district_df['Total budget'] = district_df['Total budget'].map('${:,.2f}'.format)
district_df['Average math score'] = district_df['Average math score'].map('{:,.2f}'.format)
district_df['Average reading score'] = district_df['Average reading score'].map('{:,.2f}'.format)
district_df['% Passing math'] = district_df['% Passing math'].map('{:,.2f}%'.format)
district_df['% Passing reading'] = district_df['% Passing reading'].map('{:,.2f}%'.format)
district_df['% Passing both'] = district_df['% Passing both'].map('{:,.2f}%'.format)

district_df

Unnamed: 0,Total schools,Total students,Total budget,Average math score,Average reading score,% Passing math,% Passing reading,% Passing both
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,65.17%


In [18]:
## School Summary

indexes = schools_df['school_name'].values.tolist()
#School Name

#School Type
school_type = pd.Series(schools_df['type'].values.tolist(),index=indexes)

#Total Students
total_students = students_df['school_name'].value_counts()

#Total School Budget
total_school_budget = pd.Series(schools_df['budget'].values.tolist(),index=indexes)

summary_school = pd.DataFrame({'Total students' : total_students,
                              'Total budget' : total_school_budget,
                              'Type of school' : school_type})

#Per Student Budget
summary_school['Per Student Budget'] = summary_school['Total budget'] / summary_school['Total students']

#Average Math Score
average_math = schools_full_df.groupby('school_name')['math_score'].mean()
summary_school['Average Math Score'] = average_math

#Average Reading Score
average_read = schools_full_df.groupby('school_name')['reading_score'].mean()
summary_school['Average Reading Score'] = average_read

#% Passing Math (The percentage of students that passed math.)
math_per = schools_full_df.loc[schools_full_df['math_score']>= 70, :]
math_per.groupby('school_name')['math_score'].count()
summary_school['% Passing math'] = math_per.groupby('school_name')['math_score'].count() / summary_school['Total students']

#% Passing Reading (The percentage of students that passed reading.)
red_per = schools_full_df.loc[schools_full_df['reading_score']>= 70, :]
red_per = red_per.groupby('school_name')['reading_score'].count()
summary_school['% Passing reading'] = red_per / summary_school['Total students']

#% Overall Passing (The percentage of students that passed math **and** reading.)
math_per = schools_full_df.loc[schools_full_df['math_score']>= 70, :]
red_per = math_per.loc[math_per['reading_score']>= 70, :]
red_per = red_per.groupby('school_name')['reading_score'].count()
summary_school['% Passing both'] = red_per / summary_school['Total students']

#Create a dataframe to hold the above results

summary_school


#summary_school['% Passing Math'] = schools_full_df.groupby('school_name')[]


Unnamed: 0,Total students,Total budget,Type of school,Per Student Budget,Average Math Score,Average Reading Score,% Passing math,% Passing reading,% Passing both
Bailey High School,4976,3124928,District,628.0,77.048432,81.033963,0.666801,0.819333,0.546423
Cabrera High School,1858,1081356,Charter,582.0,83.061895,83.97578,0.941335,0.970398,0.913348
Figueroa High School,2949,1884411,District,639.0,76.711767,81.15802,0.659885,0.807392,0.532045
Ford High School,2739,1763916,District,644.0,77.102592,80.746258,0.683096,0.79299,0.542899
Griffin High School,1468,917500,Charter,625.0,83.351499,83.816757,0.933924,0.97139,0.905995
Hernandez High School,4635,3022020,District,652.0,77.289752,80.934412,0.66753,0.80863,0.535275
Holden High School,427,248087,Charter,581.0,83.803279,83.814988,0.925059,0.962529,0.892272
Huang High School,2917,1910635,District,655.0,76.629414,81.182722,0.656839,0.813164,0.535139
Johnson High School,4761,3094650,District,650.0,77.072464,80.966394,0.660576,0.812224,0.535392
Pena High School,962,585858,Charter,609.0,83.839917,84.044699,0.945946,0.959459,0.905405


In [19]:
top_performance = summary_school.sort_values('% Passing both',ascending=False)
top_performance



Unnamed: 0,Total students,Total budget,Type of school,Per Student Budget,Average Math Score,Average Reading Score,% Passing math,% Passing reading,% Passing both
Cabrera High School,1858,1081356,Charter,582.0,83.061895,83.97578,0.941335,0.970398,0.913348
Thomas High School,1635,1043130,Charter,638.0,83.418349,83.84893,0.932722,0.973089,0.90948
Griffin High School,1468,917500,Charter,625.0,83.351499,83.816757,0.933924,0.97139,0.905995
Wilson High School,2283,1319574,Charter,578.0,83.274201,83.989488,0.938677,0.965396,0.905826
Pena High School,962,585858,Charter,609.0,83.839917,84.044699,0.945946,0.959459,0.905405
Wright High School,1800,1049400,Charter,583.0,83.682222,83.955,0.933333,0.966111,0.903333
Shelton High School,1761,1056600,Charter,600.0,83.359455,83.725724,0.938671,0.958546,0.898921
Holden High School,427,248087,Charter,581.0,83.803279,83.814988,0.925059,0.962529,0.892272
Bailey High School,4976,3124928,District,628.0,77.048432,81.033963,0.666801,0.819333,0.546423
Ford High School,2739,1763916,District,644.0,77.102592,80.746258,0.683096,0.79299,0.542899


In [20]:
bottom_performance = summary_school.sort_values('% Passing both')
bottom_performance


Unnamed: 0,Total students,Total budget,Type of school,Per Student Budget,Average Math Score,Average Reading Score,% Passing math,% Passing reading,% Passing both
Rodriguez High School,3999,2547363,District,637.0,76.842711,80.744686,0.663666,0.802201,0.529882
Figueroa High School,2949,1884411,District,639.0,76.711767,81.15802,0.659885,0.807392,0.532045
Huang High School,2917,1910635,District,655.0,76.629414,81.182722,0.656839,0.813164,0.535139
Hernandez High School,4635,3022020,District,652.0,77.289752,80.934412,0.66753,0.80863,0.535275
Johnson High School,4761,3094650,District,650.0,77.072464,80.966394,0.660576,0.812224,0.535392
Ford High School,2739,1763916,District,644.0,77.102592,80.746258,0.683096,0.79299,0.542899
Bailey High School,4976,3124928,District,628.0,77.048432,81.033963,0.666801,0.819333,0.546423
Holden High School,427,248087,Charter,581.0,83.803279,83.814988,0.925059,0.962529,0.892272
Shelton High School,1761,1056600,Charter,600.0,83.359455,83.725724,0.938671,0.958546,0.898921
Wright High School,1800,1049400,Charter,583.0,83.682222,83.955,0.933333,0.966111,0.903333


In [21]:
#Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

#Create a pandas series for each grade. Hint: use a conditional statement.
grading = schools_full_df.groupby('grade').mean()
grading

new_index = schools_full_df.set_index('school_name')
new_index

grading = new_index.groupby(['school_name','grade'])['math_score','reading_score'].mean()
grading

#Group each series by school
  
#Combine the series into a dataframe
  
#Optional: give the displayed data cleaner formatting

  grading = new_index.groupby(['school_name','grade'])['math_score','reading_score'].mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,math_score,reading_score
school_name,grade,Unnamed: 2_level_1,Unnamed: 3_level_1
Bailey High School,10th,76.996772,80.907183
Bailey High School,11th,77.515588,80.945643
Bailey High School,12th,76.492218,80.912451
Bailey High School,9th,77.083676,81.303155
Cabrera High School,10th,83.154506,84.253219
Cabrera High School,11th,82.76556,83.788382
Cabrera High School,12th,83.277487,84.287958
Cabrera High School,9th,83.094697,83.676136
Figueroa High School,10th,76.539974,81.408912
Figueroa High School,11th,76.884344,80.640339


In [22]:
#Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

#Create a pandas series for each grade. Hint: use a conditional statement.
grades = list(students_df['grade'].unique())
dt = []
for grade in grades:
    v = students_df.loc[students_df['grade'] == grade]
    v2 = pd.Series(v['math_score'].values.tolist())
    v3 = pd.Series(v['school_name'].values.tolist())
    dt.append(pd.DataFrame({'School' : v3,
                     grade : v2}))  
#Group each series by school
for i in range(len(dt)):
    dt[i] = dt[i].groupby('School').mean()   
    
#Combine the series into a dataframe
merge_df = dt[0]
for i in range(1,len(dt)):
    merge_df = pd.merge(merge_df,dt[i],on='School')   

merge_df
#Optional: give the displayed data cleaner formatting






Unnamed: 0_level_0,9th,12th,11th,10th
School,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.492218,77.515588,76.996772
Cabrera High School,83.094697,83.277487,82.76556,83.154506
Figueroa High School,76.403037,77.151369,76.884344,76.539974
Ford High School,77.361345,76.179963,76.918058,77.672316
Griffin High School,82.04401,83.356164,83.842105,84.229064
Hernandez High School,77.438495,77.186567,77.136029,77.337408
Holden High School,83.787402,82.855422,85.0,83.429825
Huang High School,77.027251,77.225641,76.446602,75.908735
Johnson High School,77.187857,76.863248,77.491653,76.691117
Pena High School,83.625455,84.121547,84.328125,83.372


In [23]:
#Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

#Create a pandas series for each grade. Hint: use a conditional statement.
grades = list(students_df['grade'].unique())
dt = []
for grade in grades:
    v = students_df.loc[students_df['grade'] == grade]
    v2 = pd.Series(v['reading_score'].values.tolist())
    v3 = pd.Series(v['school_name'].values.tolist())
    dt.append(pd.DataFrame({'School' : v3,
                     grade : v2}))  
#Group each series by school
for i in range(len(dt)):
    dt[i] = dt[i].groupby('School').mean()   
    
#Combine the series into a dataframe
merge_df = dt[0]
for i in range(1,len(dt)):
    merge_df = pd.merge(merge_df,dt[i],on='School')   

merge_df
#Optional: give the displayed data cleaner formatting


Unnamed: 0_level_0,9th,12th,11th,10th
School,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303155,80.912451,80.945643,80.907183
Cabrera High School,83.676136,84.287958,83.788382,84.253219
Figueroa High School,81.198598,81.384863,80.640339,81.408912
Ford High School,80.632653,80.662338,80.403642,81.262712
Griffin High School,83.369193,84.013699,84.288089,83.706897
Hernandez High School,80.86686,80.857143,81.39614,80.660147
Holden High School,83.677165,84.698795,83.815534,83.324561
Huang High School,81.290284,80.305983,81.417476,81.512386
Johnson High School,81.260714,81.227564,80.616027,80.773431
Pena High School,83.807273,84.59116,84.335938,83.612


In [24]:
#Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:

bins = [0, 585, 630, 645, 680]

# Create the names for the five bins
group_names = ['<$585', '$585-630','$630-645','$645-680']
summary_school_bins = summary_school
summary_school_bins['Spending Ranges Per Student'] = pd.cut(summary_school_bins['Per Student Budget'], bins,
                                                            labels=group_names,include_lowest=True)
summary_school_bins = summary_school_bins[['Average Math Score','Average Reading Score','% Passing math','% Passing reading',
                                          '% Passing both','Spending Ranges Per Student']]
summary_school_bins


indexes = summary_school_bins['Spending Ranges Per Student'].values.tolist()

pd.DataFrame({'Spending Ranges Per Student':indexes,
             'Average Math Score':summary_school_bins['Average Math Score'].values.tolist(),
             'Average Reading Score':summary_school_bins['Average Reading Score'].values.tolist(),
             '% Passing Math':summary_school_bins['% Passing math'].values.tolist(),
             '% Passing Reading':summary_school_bins['% Passing reading'].values.tolist(),
             '% Overall Passing':summary_school_bins['% Passing both'].values.tolist()}).set_index(
    'Spending Ranges Per Student').groupby('Spending Ranges Per Student').mean()



Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Spending Ranges Per Student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
$585-630,81.899826,83.155286,0.871335,0.927182,0.814186
$630-645,78.518855,81.624473,0.734842,0.843918,0.628577
$645-680,76.99721,81.027843,0.661648,0.81134,0.535269
<$585,83.455399,83.933814,0.934601,0.966109,0.903695


In [25]:
bins2 = [0, 1000, 2000, 5000]

# Create the names for the five bins
group_names2 = ['Small (<1000)', 'Medium (1000-2000)','Large(2000-5000)']

summary_school_bins2 = pd.merge(summary_school,schools_df,how='left',left_index=True,right_on='school_name')
summary_school_bins2 = summary_school_bins2[['school_name','Total students','Total budget','Type of school',
                                             'Average Math Score','Average Reading Score',
                                         '% Passing math','% Passing reading','% Passing both','size']]

summary_school_bins2['School Size'] = pd.cut(summary_school_bins2['size'], bins2,
                                                            labels=group_names2,include_lowest=True)
summary_school_bins2 = summary_school_bins2[['Average Math Score','Average Reading Score','% Passing math','% Passing reading',
                                          '% Passing both','size','School Size']]



indexes = summary_school_bins2['School Size'].values.tolist()

pd.DataFrame({'School Size':indexes,
             'Average Math Score':summary_school_bins2['Average Math Score'].values.tolist(),
             'Average Reading Score':summary_school_bins2['Average Reading Score'].values.tolist(),
             '% Passing Math':summary_school_bins2['% Passing math'].values.tolist(),
             '% Passing Reading':summary_school_bins2['% Passing reading'].values.tolist(),
             '% Overall Passing':summary_school_bins2['% Passing both'].values.tolist()}).set_index(
    'School Size').groupby('School Size').mean()




Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Large(2000-5000),77.746417,81.344493,0.699634,0.827666,0.58286
Medium (1000-2000),83.374684,83.864438,0.935997,0.967907,0.906215
Small (<1000),83.821598,83.929843,0.935502,0.960994,0.898839


In [26]:
summary_school.groupby('Type of school')[['Average Math Score',
                                          'Average Reading Score','% Passing math',
                                          '% Passing reading','% Passing both']].mean()

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing math,% Passing reading,% Passing both
Type of school,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.473852,83.896421,0.936208,0.965865,0.904322
District,76.956733,80.966636,0.665485,0.807991,0.536722
