In [2]:
#Dependencies
import pandas as pd
import os

In [3]:
#read csvs

schoolspath = os.path.join("Resources","schools_complete.csv")
studentspath = os.path.join("Resources","students_complete.csv")

schools = pd.read_csv(schoolspath)
students = pd.read_csv(studentspath)


In [4]:
#Explore schools DataFrame

schools

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,Government,2917,1910635
1,1,Figueroa High School,Government,2949,1884411
2,2,Shelton High School,Independent,1761,1056600
3,3,Hernandez High School,Government,4635,3022020
4,4,Griffin High School,Independent,1468,917500
5,5,Wilson High School,Independent,2283,1319574
6,6,Cabrera High School,Independent,1858,1081356
7,7,Bailey High School,Government,4976,3124928
8,8,Holden High School,Independent,427,248087
9,9,Pena High School,Independent,962,585858


In [5]:
#Explore students DataFrame

students.head()

Unnamed: 0,Student ID,student_name,gender,year,school_name,reading_score,maths_score
0,0,Paul Bradley,M,9,Huang High School,96,94
1,1,Victor Smith,M,12,Huang High School,90,43
2,2,Kevin Rodriguez,M,12,Huang High School,41,76
3,3,Richard Scott,M,12,Huang High School,89,86
4,4,Bonnie Ray,F,9,Huang High School,87,69


In [6]:
#Merge DataFrame by School Name

df = pd.merge(students,schools, on="school_name")

df



Unnamed: 0,Student ID,student_name,gender,year,school_name,reading_score,maths_score,School ID,type,size,budget
0,0,Paul Bradley,M,9,Huang High School,96,94,0,Government,2917,1910635
1,1,Victor Smith,M,12,Huang High School,90,43,0,Government,2917,1910635
2,2,Kevin Rodriguez,M,12,Huang High School,41,76,0,Government,2917,1910635
3,3,Richard Scott,M,12,Huang High School,89,86,0,Government,2917,1910635
4,4,Bonnie Ray,F,9,Huang High School,87,69,0,Government,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12,Thomas High School,51,48,14,Independent,1635,1043130
39166,39166,Dawn Bell,F,10,Thomas High School,81,89,14,Independent,1635,1043130
39167,39167,Rebecca Tanner,F,9,Thomas High School,99,99,14,Independent,1635,1043130
39168,39168,Desiree Kidd,F,10,Thomas High School,72,77,14,Independent,1635,1043130


# LGA Section

### Calculations:

In [11]:
#   Total number of unique schools
total_schools = df["school_name"].nunique()

#   Total students

total_students = df["Student ID"].nunique()

#   Total Budget

#                   I'm tempted to simply sum the schools DataFrame (i.e. before the merge); 
#                   instead, I will try a method involving school Id.


#   Create List
school_names = list(df["school_name"].unique())

#   initialize total budget
total_budget = 0

for school in school_names:

    #Filter per school and reset index for next step
    school_row = df.loc[df["school_name"]==school,:].reset_index()

    #Store budget of school using first row
    school_budget = school_row.loc[0,"budget"]

    #Add to total
    total_budget += school_budget


# This yields the same answer as proven with this:
# total_budget == schools["budget"].sum()

#   Average Maths Score

average_maths = df["maths_score"].mean()

#   Average Reading Score

average_reading = df["reading_score"].mean()

#   % Passing Maths

percent_passing_maths = len(df.loc[df["maths_score"]>=50,:])/len(df)*100

#   % Passing Reading

percent_passing_reading = len(df.loc[df["reading_score"]>=50,:])/len(df)*100

#   % Overall Passing

overall_passing = len(df.loc[(df["reading_score"]>=50) & (df["maths_score"]>=50),:]) \
                                                /len(df)*100





### DataFrame Creation:

In [60]:
#   Create DataFrame of all values using a list of a single dictionary.
area_summary = pd.DataFrame([{"Total Schools":total_schools,
    "Total Students":total_students,
    "Total Budget":total_budget,
    "Average Maths Score":average_maths,
    "Average Reading Score":average_reading,
    "% Passing Maths":percent_passing_maths,
    "% Passing Reading":percent_passing_reading,
    "% Overall Passing":overall_passing}])

#   Format Total Budget
area_summary["Total Budget"] = area_summary["Total Budget"].map("${:,.2f}".format)

#   Format scores and percentanges by rounding.
area_summary.iloc[:,3:8] = round(area_summary.iloc[:,3:8],2)

    #Note: The Starter Code does not round these scores, 
    #but I also heeded the optional task regarding cleaner formatting


#Output
area_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",70.34,69.98,86.08,84.43,72.81


## School Summary

### Calculations:

In [68]:

#   School names
#Use previous list that collected school names: school_names



#Declare Lists
school_types = []
school_total_students = []
school_budgets = []
per_student_budget = []
school_maths_average = []
school_reading_average = []
schools_percent_passing_maths = []
schools_percent_passing_reading = []
schools_overall_passing = []



for school in school_names:

    #   School Type

    #Filter per school and reset index 
    school_row = df.loc[df["school_name"]==school,:].reset_index()

    #Store school type using first row
    type = school_row.loc[0,"type"]

    #Append to school types list
    school_types.append(type)


    #   Total Students

    #Store count of students 
    student_count = school_row["Student ID"].count()

    #Append to school total list
    school_total_students.append(student_count)


    #   Total School Budget

    #Store school budget using first row
    school_budget = school_row.loc[0,"budget"]

    #Append to school budget list
    school_budgets.append(school_budget)


    #   Per Student Budget

    per_student_budget.append(school_budgets[school_names.index(school)]/ \
                            school_total_students[school_names.index(school)]) 


    
    #   Average Maths Score

    #Store school average maths score
    school_average = school_row["maths_score"].mean()

    #Append to school average list
    school_maths_average.append(school_average)

    #   Average Reading Score

    #Store school average reading score
    school_average = school_row["reading_score"].mean()

    #Append to school average list
    school_reading_average.append(school_average)


    #   % Passing Maths

    #calculate
    school_percent_passing_maths = len(school_row.loc[school_row["maths_score"]>=50,:])/len(school_row)*100

    #append
    schools_percent_passing_maths.append(school_percent_passing_maths)


    #   % Passing Reading

    #calculate
    school_percent_passing_reading = len(school_row.loc[school_row["reading_score"]>=50,:])/len(school_row)*100

    #append
    schools_percent_passing_reading.append(school_percent_passing_reading)


    #   % Overall Passing

    #calculate
    school_overall_passing = len(school_row.loc[(school_row["reading_score"]>=50) & (school_row["maths_score"]>=50),:]) \
                                                    /len(school_row)*100

    schools_overall_passing.append(school_overall_passing)





### DataFrame Creation:

In [69]:
#   Create DataFrame using dictionary of lists

per_school_summary = \
    pd.DataFrame({"School Name":school_names,
                    "School Type": school_types,
                    "Total Students": school_total_students,
                    "School Budget": school_budgets,
                    "Per Student Budget": per_student_budget,
                    "Average Maths Score": school_maths_average,
                    "Average Reading Score": school_reading_average,
                    "% Passing Maths": schools_percent_passing_maths,
                    "% Passing Reading": schools_percent_passing_reading,
                    "% Overall Passing": schools_overall_passing})

#   Format Index
per_school_summary = per_school_summary.set_index(["School Name"])
per_school_summary.index = per_school_summary.index.rename(None)
per_school_summary = per_school_summary.sort_index()

#   Format Columns

per_school_summary["School Budget"] = per_school_summary["School Budget"].map("${:,.2f}".format)
per_school_summary["Per Student Budget"] = per_school_summary["Per Student Budget"].map("${:,.2f}".format)

#Optional rounding
per_school_summary.iloc[:,4:9] = round(per_school_summary.iloc[:,4:9],2)

per_school_summary

Unnamed: 0,School Type,Total Students,School Budget,Per Student Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
Bailey High School,Government,4976,"$3,124,928.00",$628.00,72.35,71.01,91.64,87.38,80.08
Cabrera High School,Independent,1858,"$1,081,356.00",$582.00,71.66,71.36,90.85,89.07,80.79
Figueroa High School,Government,2949,"$1,884,411.00",$639.00,68.7,69.08,81.65,82.81,67.65
Ford High School,Government,2739,"$1,763,916.00",$644.00,69.09,69.57,82.44,82.22,67.47
Griffin High School,Independent,1468,"$917,500.00",$625.00,71.79,71.25,91.21,88.49,81.34
Hernandez High School,Government,4635,"$3,022,020.00",$652.00,68.87,69.19,80.95,81.88,66.36
Holden High School,Independent,427,"$248,087.00",$581.00,72.58,71.66,89.93,88.52,78.92
Huang High School,Government,2917,"$1,910,635.00",$655.00,68.94,68.91,81.69,81.45,66.71
Johnson High School,Government,4761,"$3,094,650.00",$650.00,68.84,69.04,82.06,81.98,67.19
Pena High School,Independent,962,"$585,858.00",$609.00,72.09,71.61,91.68,86.59,79.21


## Highest-Performing Schools (by % Overall Passing)

In [63]:
top_schools = per_school_summary.sort_values(["% Overall Passing"], ascending=False)

top_schools

Unnamed: 0,School Type,Total Students,School Budget,Per Student Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
Griffin High School,Independent,1468,"$917,500.00",$625.00,71.79,71.25,91.21,88.49,81.34
Cabrera High School,Independent,1858,"$1,081,356.00",$582.00,71.66,71.36,90.85,89.07,80.79
Bailey High School,Government,4976,"$3,124,928.00",$628.00,72.35,71.01,91.64,87.38,80.08
Wright High School,Independent,1800,"$1,049,400.00",$583.00,72.05,70.97,91.78,86.67,79.72
Rodriguez High School,Government,3999,"$2,547,363.00",$637.00,72.05,70.94,90.8,87.4,79.42
Pena High School,Independent,962,"$585,858.00",$609.00,72.09,71.61,91.68,86.59,79.21
Holden High School,Independent,427,"$248,087.00",$581.00,72.58,71.66,89.93,88.52,78.92
Shelton High School,Independent,1761,"$1,056,600.00",$600.00,72.03,70.26,91.54,86.71,78.88
Thomas High School,Independent,1635,"$1,043,130.00",$638.00,69.58,69.77,83.85,82.63,69.48
Figueroa High School,Government,2949,"$1,884,411.00",$639.00,68.7,69.08,81.65,82.81,67.65


## Lowest-Performing Schools (by % Overall Passing)

In [64]:
bottom_schools = per_school_summary.sort_values(["% Overall Passing"], ascending = True)

bottom_schools

Unnamed: 0,School Type,Total Students,School Budget,Per Student Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
Hernandez High School,Government,4635,"$3,022,020.00",$652.00,68.87,69.19,80.95,81.88,66.36
Huang High School,Government,2917,"$1,910,635.00",$655.00,68.94,68.91,81.69,81.45,66.71
Johnson High School,Government,4761,"$3,094,650.00",$650.00,68.84,69.04,82.06,81.98,67.19
Wilson High School,Independent,2283,"$1,319,574.00",$578.00,69.17,68.88,82.79,81.3,67.46
Ford High School,Government,2739,"$1,763,916.00",$644.00,69.09,69.57,82.44,82.22,67.47
Figueroa High School,Government,2949,"$1,884,411.00",$639.00,68.7,69.08,81.65,82.81,67.65
Thomas High School,Independent,1635,"$1,043,130.00",$638.00,69.58,69.77,83.85,82.63,69.48
Shelton High School,Independent,1761,"$1,056,600.00",$600.00,72.03,70.26,91.54,86.71,78.88
Holden High School,Independent,427,"$248,087.00",$581.00,72.58,71.66,89.93,88.52,78.92
Pena High School,Independent,962,"$585,858.00",$609.00,72.09,71.61,91.68,86.59,79.21


## Maths Scores by Year

In [86]:
#Create list of years
years = list(df["year"].unique())
years.sort()

#Declare Dictionary
years_maths_average = {}

for year in years:

    year_df = df.loc[df["year"]==year,:]
    years_maths_average[year] = year_df["maths_score"].mean()

years_maths_average

{9: 70.42803295932679,
 10: 70.2115460267506,
 11: 70.34182568334192,
 12: 70.36700848208633}

In [83]:
#Create list of years
years = list(df["year"].unique())
years.sort()

#Declare Dictionary
Years_df = {}

for year in years:

    #Store each year as a dataframe in a dictionary
    Years_df[year] = pd.DataFrame(df.loc[df["year"]==year,:])

Years_df[12]["year"].value_counts()


12    7899
Name: year, dtype: int64