# Observable Trends

###### 1)
###### 2)
###### 3)

In [1]:
# Import dependencies
import pandas as pd

In [2]:
# Set csv paths
schools_path = 'raw_data/schools_complete.csv'
students_path = 'raw_data/students_complete.csv'

In [3]:
# Read schools csv
schools_df = pd.read_csv(schools_path)
schools_df.head()

Unnamed: 0,School ID,name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [4]:
# Read students csv
students_df = pd.read_csv(students_path)
students_df.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


# District Summary

In [5]:
# Grab total schools
total_schools = schools_df['name'].count()

# Grab total students
total_students = schools_df['size'].sum()

# Grab total budget
total_budget = schools_df['budget'].sum()

# Grab average math score
avg_math = students_df['math_score'].mean()

# Grab average reading score
avg_read = students_df['reading_score'].mean()

# Calculate % passing math
pass_math = (students_df['math_score'] >= 70).sum()/total_students*100

# Calculate % passing reading
pass_read = (students_df['reading_score'] >= 70).sum()/total_students*100

# Calculate overall passing grade
overall_pass = (pass_math + pass_read)/2

In [6]:
# Create District Summary dataframe
district_summary = pd.DataFrame({'Total Schools': [total_schools], 
                                 'Total Students': [total_students], 
                                 'Total Budget': [f'${total_budget:,.2f}'], 
                                 'Average Math Score': [avg_math], 
                                 'Average Reading Score': [avg_read], 
                                 '% Passing Math': [pass_math], 
                                 '% Passing Reading': [pass_read], 
                                 '% Overall Passing Rate': [overall_pass]})

# Reorder columns
district_summary = district_summary[['Total Schools', 'Total Students', 'Total Budget', 'Average Math Score', 
                                     'Average Reading Score', '% Passing Math', '% Passing Reading', '% Overall Passing Rate']]

district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,80.393158


# School Summary

In [7]:
# Use students dataframe to groupby school
grouped_shools = students_df.groupby(['school'])
grouped_shools

<pandas.core.groupby.DataFrameGroupBy object at 0x00000276B9CA6F28>

In [30]:
# Grab average math and reading scores for each school
avg_math_scores = grouped_shools['math_score'].mean()
avg_read_scores = grouped_shools['reading_score'].mean()

# Create dataframe from averages
student_scores = pd.DataFrame({'Average Math Score': avg_math_scores, 
                                'Average Reading Score': avg_read_scores})

# Reset index for merge
student_scores = student_scores.reset_index(drop=True)

# Drop index column
#student_scores = student_scores.drop(['index'], axis=1)
student_scores.head()

Unnamed: 0,Average Math Score,Average Reading Score
0,77.048432,81.033963
1,83.061895,83.97578
2,76.711767,81.15802
3,77.102592,80.746258
4,83.351499,83.816757


In [None]:
# Subset original students df to count passing students for each school
only_pass_math = students_df.loc[students_df['math_score'] >= 70,:]
only_pass_read = students_df.loc[students_df['reading_score'] >= 70,:]

# Group by school
grouped_only_pass_math = only_pass_math.groupby(['school'])
grouped_only_pass_read = only_pass_read.groupby(['school'])

# Grab student counts who passed for each school
pass_math = grouped_only_pass_math['math_score'].count()
pass_read = grouped_only_pass_read['reading_score'].count()

In [None]:
# Insert group by series into dataframe
pass_summary = pd.DataFrame({'Passing Math Counts': pass_math, 'Passing Reading Counts': pass_read})

# Reset index
pass_summary = pass_summary.reset_index()
pass_summary.head()

In [None]:
# Merge student_scores df and pass_summary on school
merge_stu_pass = pd.merge(student_scores, pass_summary, on='school')
merge_stu_pass.head()

In [None]:
# Rename columns and set df to schools summary
schools_summary = schools_df.rename(columns={'name': 'school', 'type': 'School Type', 'size': 'Total Students', 
                                             'budget': 'Total School Budget'})
# Drop School ID
schools_summary = schools_summary.drop(['School ID'], axis=1)

# Add budget per student column
schools_summary['Per Student Budget'] = schools_summary['Total School Budget']/schools_summary['Total Students']
schools_summary.head()

In [None]:
# Merge schools_summary and students_summary on school
combined_stu_sch = pd.merge(schools_summary, merge_stu_pass, on='school')

# Add % passing math and reading
combined_stu_sch['% Passing Math'] = combined_stu_sch['Passing Math Counts']/combined_stu_sch['Total Students']*100
combined_stu_sch['% Passing Reading'] = combined_stu_sch['Passing Reading Counts']/combined_stu_sch['Total Students']*100

# Add Overall Passing Rate Column
combined_stu_sch['% Overall Passing Rate'] = (combined_stu_sch['% Passing Math'] + combined_stu_sch['% Passing Reading'])/2

# Drop passing math and reading counts columns
combined_stu_sch = combined_stu_sch.drop(['Passing Math Counts', 'Passing Reading Counts'], axis=1)

# Map to format budget columns
combined_stu_sch['Total School Budget'] = combined_stu_sch['Total School Budget'].map('${:,.2f}'.format)
combined_stu_sch['Per Student Budget'] = combined_stu_sch['Per Student Budget'].map('${:,.2f}'.format)

# Rename school column
combined_stu_sch = combined_stu_sch.rename(columns={'school': 'School Name'})

# Sort alphabetically
combined_stu_sch = combined_stu_sch.sort_values(by=['School Name'])

# Index school for visibility
combined_stu_sch = combined_stu_sch.set_index(['School Name'])
combined_stu_sch

# Top Performing Schools (By Passing Rate)

In [None]:
# Sort
top_schools = combined_stu_sch.sort_values(by=['% Overall Passing Rate'], ascending=False)
top_schools.head()

# Bottom Performing Schools (By Passing Rate)

In [None]:
# Sort
top_schools = combined_stu_sch.sort_values(by=['% Overall Passing Rate'])
top_schools.head()

# Math Scores by Grade

In [None]:
students_df.head()

In [None]:
# Group by grade
grouped_sch_grade = students_df.groupby(['school', 'grade'])
grade_math_score = grouped_sch_grade['math_score'].mean()

# Reset index
grade_math_score = grade_math_score.reset_index()
grade_math_score

# Create dataframe
# grade_math_score = pd.DataFrame(grade_math_score)
# grade_math_score
# test = test.stack(level=['9th', '10th', '11th', '12th'])