In [3]:
import pandas as pd

#Load csv files
school_csv = "../Resources/schools_complete.csv"
student_csv = "../Resources/students_complete.csv"

#Read School and Student csv files
school_read = pd.read_csv(school_csv)
student_read = pd.read_csv(student_csv)

#Combine data files into single dataset and dataframe
school_data_df = pd.merge(student_read, school_read, how= 'left', on=['school_name', 'school_name'])
school_data_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


District Summary

In [4]:
school_count = school_data_df['school_name'].nunique()
school_count


15

In [5]:
student_count = school_data_df['student_name'].value_counts()
student_total = student_count.sum()
student_total

39170

In [6]:
total_budget = school_data_df['budget'].drop_duplicates()
total = total_budget.sum()
total


24649428

In [7]:
average_math_score = school_data_df['math_score'].mean()
average_math_score

78.98537145774827

In [8]:
average_reading_score = school_data_df['reading_score'].mean()
average_reading_score

81.87784018381414

In [9]:
passing_math_count = school_data_df[(school_data_df['math_score'] >= 70)].count()['student_name']
mathPass_percent = passing_math_count / float(student_total) * 100
mathPass_percent

74.9808526933878

In [10]:
passing_reading_count = school_data_df[(school_data_df['reading_score'] >= 70)].count()['student_name']
readPass_percent = passing_reading_count / float(student_total) * 100
readPass_percent

85.80546336482001

In [11]:
overall_pass_count = school_data_df[(school_data_df['math_score'] >= 70) & 
                                    (school_data_df['reading_score']>= 70)].count()['student_name']
overall_passing = overall_pass_count / float(student_total) * 100
overall_passing

65.17232575950983

In [12]:
district_summary = [{'Total Schools': school_count, 'Total Students': student_total, 'Total Budget': total,
                     'Average Math Score': average_math_score, 'Average Reading Score': average_reading_score,
                     '% Passing Math': mathPass_percent, '% Passing Reading': readPass_percent, '% Overall Passing': overall_passing}]

district_summary = pd.DataFrame(district_summary)

district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)

district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


School Summary

In [84]:
schoolType = school_data_df.set_index(['school_name'])['type']
print(schoolType)

school_name
Huang High School     District
Huang High School     District
Huang High School     District
Huang High School     District
Huang High School     District
                        ...   
Thomas High School     Charter
Thomas High School     Charter
Thomas High School     Charter
Thomas High School     Charter
Thomas High School     Charter
Name: type, Length: 39170, dtype: object


In [83]:
student_per_school = school_data_df['school_name'].value_counts()
print(student_per_school)


Bailey High School       4976
Johnson High School      4761
Hernandez High School    4635
Rodriguez High School    3999
Figueroa High School     2949
Huang High School        2917
Ford High School         2739
Wilson High School       2283
Cabrera High School      1858
Wright High School       1800
Shelton High School      1761
Thomas High School       1635
Griffin High School      1468
Pena High School          962
Holden High School        427
Name: school_name, dtype: int64


In [32]:
budget_per_school = school_data_df.groupby(['school_name']).mean()['budget']

capita_per_school = budget_per_school / student_per_school

budget_per_school.head()


school_name
Bailey High School      3124928.0
Cabrera High School     1081356.0
Figueroa High School    1884411.0
Ford High School        1763916.0
Griffin High School      917500.0
Name: budget, dtype: float64

In [85]:
math_per_school = school_data_df.groupby(['school_name']).mean()['math_score']
reading_per_school = school_data_df.groupby(['school_name']).mean()['reading_score']

print(math_per_school)

school_name
Bailey High School       77.048432
Cabrera High School      83.061895
Figueroa High School     76.711767
Ford High School         77.102592
Griffin High School      83.351499
Hernandez High School    77.289752
Holden High School       83.803279
Huang High School        76.629414
Johnson High School      77.072464
Pena High School         83.839917
Rodriguez High School    76.842711
Shelton High School      83.359455
Thomas High School       83.418349
Wilson High School       83.274201
Wright High School       83.682222
Name: math_score, dtype: float64


In [86]:
math_pass = school_data_df[(school_data_df['math_score'] >= 70)]
read_pass = school_data_df[(school_data_df['reading_score'] >= 70)]
math_read_pass = school_data_df[(school_data_df['reading_score'] >= 70) &
                                (school_data_df['math_score'] >= 70)]

print(math_read_pass)

       Student ID     student_name gender grade         school_name  \
4               4       Bonnie Ray      F   9th   Huang High School   
5               5    Bryan Miranda      M   9th   Huang High School   
6               6    Sheena Carter      F  11th   Huang High School   
8               8     Michael Roth      M  10th   Huang High School   
9               9   Matthew Greene      M  10th   Huang High School   
...           ...              ...    ...   ...                 ...   
39165       39165     Donna Howard      F  12th  Thomas High School   
39166       39166        Dawn Bell      F  10th  Thomas High School   
39167       39167   Rebecca Tanner      F   9th  Thomas High School   
39168       39168     Desiree Kidd      F  10th  Thomas High School   
39169       39169  Carolyn Jackson      F  11th  Thomas High School   

       reading_score  math_score  School ID      type  size   budget  
4                 97          84          0  District  2917  1910635  
5    

In [76]:
pass_math_per_school = math_pass.groupby(['school_name']).count()['student_name']
pass_math_perc_school = pass_math_per_school / student_per_school * 100

pass_read_per_school = read_pass.groupby(['school_name']).count()['student_name']
pass_read_perc_school = pass_read_per_school / student_per_school * 100

math_read_pass = math_read_pass.groupby(['school_name']).count()['student_name']
overall_pass_perc_school = math_read_pass / student_per_school * 100

overall_pass_perc_school.head(20)

Bailey High School       54.642283
Cabrera High School      91.334769
Figueroa High School     53.204476
Ford High School         54.289887
Griffin High School      90.599455
Hernandez High School    53.527508
Holden High School       89.227166
Huang High School        53.513884
Johnson High School      53.539172
Pena High School         90.540541
Rodriguez High School    52.988247
Shelton High School      89.892107
Thomas High School       90.948012
Wilson High School       90.582567
Wright High School       90.333333
dtype: float64

In [81]:
school_summary = [{'School Type': schoolType, 
                   'Total Students': student_per_school,
                   'Total School Budget': budget_per_school,
                   'Per Student Budget': capita_per_school,
                   'Average Math Score': math_per_school,
                   'Average Reading Score': reading_per_school,
                   '% Passing Math': pass_math_perc_school,
                   '% Passing Reading': pass_read_perc_school,
                   '% Overall Passing': overall_pass_perc_school}]

school_summary = pd.DataFrame(school_summary)

#school_summary['Total School Budget'] = school_summary['Total School Budget'].map('${:,.2f}'.format)
#school_summary['Per Student Budget'] = school_summary['Per Student Budget'].map('${:,.2f}'.format)

print(school_summary)

                                         School Type  \
0  school_name
Huang High School     District
Hua...   

                                      Total Students  \
0  Bailey High School       4976
Johnson High Sch...   

                                 Total School Budget  \
0  school_name
Bailey High School       3124928.0...   

                                  Per Student Budget  \
0  Bailey High School       628.0
Cabrera High Sc...   

                                  Average Math Score  \
0  school_name
Bailey High School       77.048432...   

                               Average Reading Score  \
0  school_name
Bailey High School       81.033963...   

                                      % Passing Math  \
0  Bailey High School       66.680064
Cabrera Hig...   

                                   % Passing Reading  \
0  Bailey High School       81.933280
Cabrera Hig...   

                                   % Overall Passing  
0  Bailey High School       54.642283
Cab