In [223]:
#Dependencies
import pandas as pd
import os

In [224]:
#read csvs

schoolspath = os.path.join("Resources","schools_complete.csv")
studentspath = os.path.join("Resources","students_complete.csv")

schools = pd.read_csv(schoolspath)
students = pd.read_csv(studentspath)


In [225]:
#Explore schools DataFrame

schools

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,Government,2917,1910635
1,1,Figueroa High School,Government,2949,1884411
2,2,Shelton High School,Independent,1761,1056600
3,3,Hernandez High School,Government,4635,3022020
4,4,Griffin High School,Independent,1468,917500
5,5,Wilson High School,Independent,2283,1319574
6,6,Cabrera High School,Independent,1858,1081356
7,7,Bailey High School,Government,4976,3124928
8,8,Holden High School,Independent,427,248087
9,9,Pena High School,Independent,962,585858


In [226]:
#Explore students DataFrame

students.head()

Unnamed: 0,Student ID,student_name,gender,year,school_name,reading_score,maths_score
0,0,Paul Bradley,M,9,Huang High School,96,94
1,1,Victor Smith,M,12,Huang High School,90,43
2,2,Kevin Rodriguez,M,12,Huang High School,41,76
3,3,Richard Scott,M,12,Huang High School,89,86
4,4,Bonnie Ray,F,9,Huang High School,87,69


In [227]:
#Merge DataFrame by School Name

df = pd.merge(students,schools, on="school_name")

df



Unnamed: 0,Student ID,student_name,gender,year,school_name,reading_score,maths_score,School ID,type,size,budget
0,0,Paul Bradley,M,9,Huang High School,96,94,0,Government,2917,1910635
1,1,Victor Smith,M,12,Huang High School,90,43,0,Government,2917,1910635
2,2,Kevin Rodriguez,M,12,Huang High School,41,76,0,Government,2917,1910635
3,3,Richard Scott,M,12,Huang High School,89,86,0,Government,2917,1910635
4,4,Bonnie Ray,F,9,Huang High School,87,69,0,Government,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12,Thomas High School,51,48,14,Independent,1635,1043130
39166,39166,Dawn Bell,F,10,Thomas High School,81,89,14,Independent,1635,1043130
39167,39167,Rebecca Tanner,F,9,Thomas High School,99,99,14,Independent,1635,1043130
39168,39168,Desiree Kidd,F,10,Thomas High School,72,77,14,Independent,1635,1043130


# LGA Section

### Calculations:

In [228]:
#   Total number of unique schools
total_schools = df["school_name"].nunique()

#   Total students

total_students = df["Student ID"].nunique()

#   Total Budget

#                   I'm tempted to simply sum the schools DataFrame (i.e. before the merge); 
#                   instead, I will try a method involving school Id.


#   Create List
school_names = list(df["school_name"].unique())

#   initialize total budget
total_budget = 0

for school in school_names:

    #Filter per school and reset index for next step
    school_row = df.loc[df["school_name"]==school,:].reset_index()

    #Store budget of school using first row
    school_budget = school_row.loc[0,"budget"]

    #Add to total
    total_budget += school_budget


# This yields the same answer as proven with this:
# total_budget == schools["budget"].sum()

#   Average Maths Score

average_maths = df["maths_score"].mean()

#   Average Reading Score

average_reading = df["reading_score"].mean()

#   % Passing Maths

percent_passing_maths = len(df.loc[df["maths_score"]>=50,:])/len(df)*100

#   % Passing Reading

percent_passing_reading = len(df.loc[df["reading_score"]>=50,:])/len(df)*100

#   % Overall Passing

overall_passing = len(df.loc[(df["reading_score"]>=50) & (df["maths_score"]>=50),:]) \
                                                /len(df)*100





### DataFrame Creation:

In [230]:
#   Create DataFrame of all values using a list of a single dictionary.
area_summary = pd.DataFrame([{"Total Schools":total_schools,
    "Total Students":total_students,
    "Total Budget":total_budget,
    "Average Maths Score":average_maths,
    "Average Reading Score":average_reading,
    "% Passing Maths":percent_passing_maths,
    "% Passing Reading":percent_passing_reading,
    "% Overall Passing":overall_passing}])

#   Format Total Budget
area_summary["Total Budget"] = area_summary["Total Budget"].map("${:,.2f}".format)

#   Format scores and percentagbes by rounding.
area_summary["Average Maths Score"] = area_summary["Average Maths Score"].map("{:.2f}".format)
area_summary["Average Reading Score"] = area_summary["Average Reading Score"].map("{:.2f}".format)
area_summary["% Passing Maths"] = area_summary["% Passing Maths"].map("{:.2f}".format)
area_summary["% Passing Reading"] = area_summary["% Passing Reading"].map("{:.2f}".format)
area_summary["% Overall Passing"] = area_summary["% Overall Passing"].map("{:.2f}".format)

    #Note: The Starter Code does not round these scores, 
    #but I also heeded the optional task regarding cleaner formatting


#Output
area_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",70.34,69.98,86.08,84.43,72.81


## School Summary

### Calculations:

In [241]:

#   School names
#Use previous list that collected school names: school_names



#Declare Lists
school_types = []
school_total_students = []
school_budgets = []
school_maths_average = []
school_reading_average = []

for school in school_names:

    #   School Type

    #Filter per school and reset index 
    school_row = df.loc[df["school_name"]==school,:].reset_index()

    #Store school type using first row
    type = school_row.loc[0,"type"]

    #Append to school types list
    school_types.append(type)


    #   Total Students

    #Store count of students 
    student_count = school_row["Student ID"].count()

    #Append to school types list
    school_total_students.append(student_count)


    #   Total School Budget

    #Store school budget using first row
    school_budget = school_row.loc[0,"budget"]

    #Append to school budget list
    school_budgets.append(school_budget)

    
    #   Average Maths Score

    #Store school average maths score
    school_average = school_row["maths_score"].mean()

    #Append to school average list
    school_maths_average.append(school_average)

    #   Average Reading Score

    #Store school average reading score
    school_average = school_row["reading_score"].mean()

    #Append to school average list
    school_reading_average.append(school_average)

#   Per Student Budget

#Declare List
per_student_budget = []

[per_student_budget.append(school_budgets[school_names.index(x)]/school_total_students[school_names.index(x)]) for x in school_names]




#   % Passing Maths





[68.93520740486801,
 68.69854187860291,
 72.03407155025553,
 68.87486515641855,
 71.78814713896458,
 69.17082785808147,
 71.65715823466093,
 72.35289389067525,
 72.5831381733021,
 72.08835758835758,
 72.04722222222222,
 72.04776194048512,
 68.84310018903592,
 69.09127418765974,
 69.5816513761468]

In [None]:

#   School names
#Use previous list that collected school names: school_names

#   School Type

#Declare List
school_types = []

for school in school_names:

    #Filter per school and reset index for next step
    school_row = df.loc[df["school_name"]==school,:].reset_index()

    #Store school type using first row
    type = school_row.loc[0,"type"]

    #Append to school types list
    school_types.append(type)

#   Total Students

#Declare list
school_total_students = []

for school in school_names:

    #Filter per school 
    school_row = df.loc[df["school_name"]==school,:].reset_index()

    #Store count of students 
    student_count = school_row["Student ID"].count()

    #Append to school types list
    school_total_students.append(student_count)


#   Total School Budget

#Declare List
school_budgets = []

for school in school_names:

    #Filter per school and reset index for next step
    school_row = df.loc[df["school_name"]==school,:].reset_index()

    #Store school budget using first row
    school_budget = school_row.loc[0,"budget"]

    #Append to school budget list
    school_budgets.append(school_budget)


#   Per Student Budget

#Declare List
per_student_budget = []

[per_student_budget.append(school_budgets[school_names.index(x)]/school_total_students[school_names.index(x)]) for x in school_names]

#   Average Maths Score

#Declare List
school_maths_average = []

for school in school_names:

    #Filter per school 
    school_row = df.loc[df["school_name"]==school,:].reset_index()

    #Store school average maths score
    school_average = school_row["maths_score"].mean()

    #Append to school average list
    school_maths_average.append(school_average)

#   Average Reading Score

#Declare List
school_reading_average = []

for school in school_names:

    #Filter per school 
    school_row = df.loc[df["school_name"]==school,:].reset_index()

    #Store school average reading score
    school_average = school_row["reading_score"].mean()

    #Append to school average list
    school_reading_average.append(school_average)


#   % Passing Maths

school_maths_average





## Maths Scores by Year

### This draft code is just saved for the second cell which demonstrates multiple dataframes being put in a dictionary.

In [1]:
#Create list of years
years = list(df["year"].unique())
years.sort()

#Declare Dictionary
years_maths_average = {}

for year in years:

    year_df = df.loc[df["year"]==year,:]
    years_maths_average[year] = year_df["maths_score"].mean()

years_maths_average

NameError: name 'df' is not defined

In [None]:
#Create list of years
years = list(df["year"].unique())
years.sort()

#Declare Dictionary
Years_df = {}

for year in years:

    #Store each year as a dataframe in a dictionary
    Years_df[year] = pd.DataFrame(df.loc[df["year"]==year,:])

Years_df[12]["year"].value_counts()
