In [50]:
from pathlib import Path
import pandas as pd

school_data_to_load = Path("Resources/schools_complete.csv")
student_data_to_load = Path("Resources/students_complete.csv")

school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,year,school_name,reading_score,maths_score,School ID,type,size,budget
0,0,Paul Bradley,M,9,Huang High School,96,94,0,Government,2917,1910635
1,1,Victor Smith,M,12,Huang High School,90,43,0,Government,2917,1910635
2,2,Kevin Rodriguez,M,12,Huang High School,41,76,0,Government,2917,1910635
3,3,Richard Scott,M,12,Huang High School,89,86,0,Government,2917,1910635
4,4,Bonnie Ray,F,9,Huang High School,87,69,0,Government,2917,1910635


In [33]:
total_students_count = student_data["student_name"].count()
total_students_count

39170

In [51]:
total_budget_count = school_data["budget"].sum()
total_budget_count

24649428

In [52]:
total_school_count = len(school_data["school_name"].unique())
total_school_count

15

In [54]:
Per_student_budget = total_budget_count/total_students_count
Per_student_budget

629.2935409752362

In [55]:
average_maths_score = student_data["maths_score"].mean()
average_maths_score

70.33819249425581

In [56]:
average_reading_score = student_data["reading_score"].mean()
average_reading_score

69.98013786060761

In [57]:
passing_maths_count = school_data_complete[(school_data_complete["maths_score"] >= 50)].count()["student_name"]
passing_maths_count

33717

In [58]:
passing_maths_percentage = passing_maths_count / float(total_students_count) * 100
passing_maths_percentage

86.07863160582077

In [59]:
passing_reading_count = school_data_complete[(school_data_complete["reading_score"] >= 50)].count()["student_name"]
passing_reading_count

33070

In [60]:
passing_reading_percentage = passing_reading_count / float(total_students_count) * 100
passing_reading_percentage

84.42685728874139

In [61]:
overall_passing_count = school_data_complete.loc[(school_data_complete["maths_score"] >= 50) & (school_data_complete["reading_score"] >= 50)].count()["student_name"]
overall_passing_count

28519

In [62]:
overall_passing_rate = overall_passing_count / float(total_students_count) * 100
overall_passing_rate

72.80827163645647

In [63]:
area_summary = pd.DataFrame({"Total School": [total_school_count],
                              "Total Students": total_students_count,
                              "Total Budget": total_budget_count,
                              "Average Maths Score": average_maths_score,
                              "Average Reading Score": average_reading_score,
                              "% Passing Maths": passing_maths_percentage,
                              "% Passing_Reading": passing_reading_percentage,
                              "% Overall Passing": overall_passing_rate})
area_summary

Unnamed: 0,Total School,Total Students,Total Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing_Reading,% Overall Passing
0,15,39170,24649428,70.338192,69.980138,86.078632,84.426857,72.808272


In [64]:
area_summary.dtypes

Total School               int64
Total Students             int64
Total Budget               int64
Average Maths Score      float64
Average Reading Score    float64
% Passing Maths          float64
% Passing_Reading        float64
% Overall Passing        float64
dtype: object

In [66]:
area_summary["Total Students"] = pd.to_numeric(area_summary["Total Students"], errors='coerce')
area_summary

Unnamed: 0,Total School,Total Students,Total Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing_Reading,% Overall Passing
0,15,39170,24649428,70.338192,69.980138,86.078632,84.426857,72.808272


In [67]:
area_summary["Total Students"] = area_summary["Total Students"].map("{:,}".format)
area_summary["Total Budget"] = area_summary["Total Budget"].map("${:,.2f}".format)
area_summary

Unnamed: 0,Total School,Total Students,Total Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing_Reading,% Overall Passing
0,15,39170,"$24,649,428.00",70.338192,69.980138,86.078632,84.426857,72.808272


In [68]:
school_types = school_data.set_index(["school_name"])["type"]
school_types

school_name
Huang High School         Government
Figueroa High School      Government
Shelton High School      Independent
Hernandez High School     Government
Griffin High School      Independent
Wilson High School       Independent
Cabrera High School      Independent
Bailey High School        Government
Holden High School       Independent
Pena High School         Independent
Wright High School       Independent
Rodriguez High School     Government
Johnson High School       Government
Ford High School          Government
Thomas High School       Independent
Name: type, dtype: object

In [77]:
per_school_counts = student_data.groupby('school_name')['student_name'].sum()
per_school_counts

school_name
Bailey High School       Blake MartinKathryn KaneRichard HaasFrank Mars...
Cabrera High School      Olivia ShortKerry JonesBruce JacksonAmy GreenM...
Figueroa High School     Amy JacobsNathan CampbellRandall StewartJennif...
Ford High School         Michael MercadoStephen WolfBonnie HughesMeliss...
Griffin High School      Heather WrightElizabeth GoodwinMichelle WongSc...
Hernandez High School    Russell DavisTimothy WalkerKatie JohnstonJoann...
Holden High School       Daniel RodriguezBryan PerkinsScott HolderTroy ...
Huang High School        Paul BradleyVictor SmithKevin RodriguezRichard...
Johnson High School      Lisa CaseyJessica LopezAnna WilkinsAndrew Smit...
Pena High School         Alec DavisMichael MeyerDonald GutierrezTravis ...
Rodriguez High School    Sherry JenkinsKimberly CalderonWilliam BradyJa...
Shelton High School      Jamie MontgomeryShannon PhillipsTodd BarberDes...
Thomas High School       Norma MataCody MillerErik SnyderTanya Martinez...
Wilson High S

In [78]:
per_school_budget = school_data_complete.groupby('school_name')['budget'].sum()
per_school_budget

school_name
Bailey High School       15549641728
Cabrera High School       2009159448
Figueroa High School      5557128039
Ford High School          4831365924
Griffin High School       1346890000
Hernandez High School    14007062700
Holden High School         105933149
Huang High School         5573322295
Johnson High School      14733628650
Pena High School           563595396
Rodriguez High School    10186904637
Shelton High School       1860672600
Thomas High School        1705517550
Wilson High School        3012587442
Wright High School        1888920000
Name: budget, dtype: int64

In [None]:
per_school_capita =

In [79]:
year_nine = school_data[(school_data["year"] == 9)]
year_ten = school_data[(school_data["year"] == 10)]
year_eleven = school_datae[(school_data["year"] == 11)]
year_twelve = school_data[(school_data["year"] == 12)]
year_nine

KeyError: 'year'

In [None]:
year_nine_scores = year_nine.groupby('school_name')['maths_score'].mean()
year_ten_scores = year_ten.groupby('school_name')['maths_score'].mean()
year_eleven_scores = year_eleven.groupby('school_name')['maths_score'].mean()
year_twelve_scores = year_twelve.groupby('school_name')['maths_score'].mean()                                   

In [None]:
maths_scores_by_year = pd.dataframe({"Year 9": [year_nine_scores],
                                     "Year 10": [year_ten_scores],
                                     "Year 11": [year_eleven_scores],
                                     "Year 12": [year_twelve_scores],})
maths_scores_by_year.index.name = None
maths_scores_by_year

In [75]:
year_nine = school_data_complete[(school_data_complete["year"] == 9)]
year_ten = school_data_complete[(school_data_complete["year"] == 10)]
year_eleven = school_data_complete[(school_data_complete["year"] == 11)]
year_twelve = school_data_complete[(school_data_complete["year"] == 12)]

year_nine_scores = year_nine.groupby('school_name')['reading_score'].mean()
year_ten_scores = year_ten.groupby('school_name')['reading_score'].mean()
year_eleven_scores = year_eleven.groupby('school_name')['reading_score'].mean()
year_twelve_scores = year_twelve.groupby('school_name')['reading_score'].mean() 

reading_scores_by_year = pd.dataframe({"Year 9": [year_nine_scores],
                                     "Year 10": [year_ten_scores],
                                     "Year 11": [year_eleven_scores],
                                     "Year 12": [year_twelve_scores],})
reading_scores_by_year.index.name = None
reading_scores_by_year

AttributeError: module 'pandas' has no attribute 'dataframe'

In [73]:
spending_bins = [0, 585, 630, 645, 680]
group_names = ["<$585", "$585-630", "$630-645", "$645-680"]

In [None]:
school_spending_df = per_school_summary

In [74]:
school_spending_df["Spending Ranges (Per Student)"] = pd.cut(per_school_summary["Per Student Budget"], spending_bins, labels=labels)
school_spending_df

NameError: name 'per_school_summary' is not defined

In [None]:
spending_maths_scores = school_spending_df.groupby(["Spending Ranges (Per Student)"])["Average Maths Score"].mean()
spending_reading_scores = school_spending_df.groupby(["Spending Ranges (Per Student)"])["Average Reading Score"].mean()
spending_passing_maths = school_spending_df.groupby(["Spending Ranges (Per Student)"])["% Passing Maths"].mean()
spending_passing_reading = school_spending_df.groupby(["Spending Ranges (Per Student)"])["% Passing Reading"].mean()
overall_passing_spending = school_spending_df.groupby(["Spending Ranges (Per Student)"])["% Overall Passing"].mean()

In [14]:
spending_summary = pd.dataframe({"Average Maths Score": [spending_maths_scores],
                                 "Average Reading Score": [spending_maths_scores],
                                 "% Passing Maths": [spending_passing_maths],
                                 "% Passing Reading": [spending_passing_reading],
                                 "% Overall Passing": [overall_passing_spending],})
spending_summary

NameError: name 'pd' is not defined

In [None]:
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

In [15]:
per_school_summary["School Size"] = pd.cut(per_school_summary["Total Students"], spending_bins, labels=labels)
per_school_summary

SyntaxError: invalid syntax (3640345833.py, line 1)

In [None]:
size_maths_scores = per_school_summary.groupby(["School Size"])["Average Maths Score"].mean()
size_reading_scores = per_school_summary.groupby(["School Size"])["Average Reading Score"].mean()
size_passing_maths = per_school_summary.groupby(["School Size"])["% Passing Maths"].mean()
size_passing_reading = per_school_summary.groupby(["School Size"])["% Passing Reading"].mean()
size_overall_passing = per_school_summary.groupby(["School Size"])["% Overall Passing"].mean()

In [None]:
size_summary = pd.dataframe({"Average Maths Score": [size_maths_scores],
                             "Average Reading Score": [size_maths_scores],
                             "% Passing Maths": [size_passing_maths],
                             "% Passing Reading": [size_passing_reading],
                             "% Overall Passing": [size_overall_passing],})
size_summary

In [None]:
type_maths_scores = per_school_summary.groupby(["School Type"])["Average Maths Score"].mean()
type_reading_scores = per_school_summary.groupby(["School Type"])["Average Reading Score"].mean()
type_passing_maths = per_school_summary.groupby(["School Type"])["% Passing Maths"].mean()
type_passing_reading = per_school_summary.groupby(["School Type"])["% Passing Reading"].mean()
type_overall_passing = per_school_summary.groupby(["School Type"])["% Overall Passing"].mean()

In [None]:
type_summary = pd.dataframe({"Average Maths Score": [type_maths_scores],
                             "Average Reading Score": [type_maths_scores],
                             "% Passing Maths": [type_passing_maths],
                             "% Passing Reading": [type_passing_reading],
                             "% Overall Passing": [type_overall_passing],})
type_summary