In [1]:
# Setup: Import resources and modules
import pandas as pd
import numpy as np 

school_data_to_load = "Resources/school_info.csv"
student_data_to_load = "Resources/student_info.csv"

# Read data 
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
merged_df = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
merged_df

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130


In [2]:
## Calculations 

# Calculate budget from existing school data sheet, add column. 
total_budget = school_data["budget"]
total_students = school_data["size"]
per_student = total_budget / total_students
school_data["Per Student Budget"] = per_student

# Re-index by school, select scores, calculate average scores. 
schools_by_name = merged_df.set_index("school_name")
avg_math = schools_by_name.groupby('school_name')["math_score"].mean()
avg_reading = schools_by_name.groupby('school_name')["reading_score"].mean()

# Merge to remove duplicate values then add to summary table
avg_scores = pd.merge(avg_math, avg_reading, on=["school_name"])
school_data = pd.merge(avg_scores, school_data, on=["school_name"])

# Rename 
school_data = school_data.rename(columns=
                                 {"math_score": "Avg Math Score",
                                "reading_score": "Avg Reading Score"})

# Select all passing scores 
passing_math = schools_by_name.loc[schools_by_name["math_score"]>=70]
passing_reading = schools_by_name.loc[schools_by_name["reading_score"]>=70]

# Group scores by school and count number of passing grades
num_passing_math = passing_math.groupby('school_name')['math_score'].count()
num_passing_reading = passing_reading.groupby('school_name')['reading_score'].count()
num_passing = pd.merge(num_passing_math, num_passing_reading, on=["school_name"])

# Add column to table 
school_data = pd.merge(num_passing, school_data, on="school_name")
math_count = school_data["math_score"]
reading_count = school_data["reading_score"]
students = school_data["size"]
school_data["Percent Passing Math"] = math_count / students * 100
school_data["Percent Passing Reading"] = reading_count / students * 100


# Calculate overall passing 
# Select scores that pass both math & reading
math_reading = schools_by_name.loc[(schools_by_name["math_score"]>=70) & (schools_by_name["reading_score"]>=70)]
#Count by school

## USE THIS TO CALC PERCENTAGES!!!!
num_overall = math_reading.groupby("school_name")["grade"].count()
total_students = schools_by_name.groupby("school_name")["Student ID"].count()
both = pd.merge(num_overall, total_students, on="school_name")
overall = []
overall = num_overall / total_students * 100
both["overall"] = overall ##FIGURE OUT HOW TO APPEND THIS TO THE ORIGINAL 

#df2[list('xab')]  # df2 but only with columns x, a, and b
#df1.merge(df2[list('xab')])
column = both["overall"]
school_data.merge(both["overall"], on="school_name") ##DO SAME FOR THE MATH AND READING SCORES

Unnamed: 0,school_name,math_score,reading_score,Avg Math Score,Avg Reading Score,School ID,type,size,budget,Per Student Budget,Percent Passing Math,Percent Passing Reading,overall
0,Bailey High School,3318,4077,77.048432,81.033963,7,District,4976,3124928,628.0,66.680064,81.93328,54.642283
1,Cabrera High School,1749,1803,83.061895,83.97578,6,Charter,1858,1081356,582.0,94.133477,97.039828,91.334769
2,Figueroa High School,1946,2381,76.711767,81.15802,1,District,2949,1884411,639.0,65.988471,80.739234,53.204476
3,Ford High School,1871,2172,77.102592,80.746258,13,District,2739,1763916,644.0,68.309602,79.299014,54.289887
4,Griffin High School,1371,1426,83.351499,83.816757,4,Charter,1468,917500,625.0,93.392371,97.138965,90.599455
5,Hernandez High School,3094,3748,77.289752,80.934412,3,District,4635,3022020,652.0,66.752967,80.862999,53.527508
6,Holden High School,395,411,83.803279,83.814988,8,Charter,427,248087,581.0,92.505855,96.252927,89.227166
7,Huang High School,1916,2372,76.629414,81.182722,0,District,2917,1910635,655.0,65.683922,81.316421,53.513884
8,Johnson High School,3145,3867,77.072464,80.966394,12,District,4761,3094650,650.0,66.057551,81.222432,53.539172
9,Pena High School,910,923,83.839917,84.044699,9,Charter,962,585858,609.0,94.594595,95.945946,90.540541
