### Deliverable 1: Collect the Data

In [43]:
import pandas as pd
import os

In [51]:
# Import data into a DataFrame
student_data = os.path.join('../Resources/new_full_student_data.csv')
student_df = pd.read_csv(student_data)

# Confirm that Pandas correctly imported the data
student_df.head()

Unnamed: 0,student_id,student_name,grade,school_name,reading_score,math_score,school_type,school_budget
19509,109236636,Robert Sawyer,12th,Silva High School,43.3,27.2,Public,991918
19510,63239258,David Herman,9th,Woods High School,52.1,80.4,Public,912243
19511,95516554,Megan Gill,11th,Wagner High School,93.9,84.1,Public,846745
19512,65050383,Lori Stone,11th,Bowers High School,94.6,70.9,Public,848324
19513,34720657,Anna Jensen,12th,Montgomery High School,82.3,42.4,Charter,893368


### Deliverable 2: Prepare the Data

In [85]:
# Check for and remove rows that have NaN (or missing values)
student_df.isnull()
s_df = student_df.dropna()
s_df

Unnamed: 0,student_id,student_name,grade,school_name,reading_score,math_score,school_type,school_budget
0,103880842,Travis Martin,9th,Sullivan High School,59.0,88.2,Public,961125
1,45069750,Michael Brown,9th,Dixon High School,94.7,73.5,Charter,870334
2,45024902,Gabriela Lucero,9th,Wagner High School,89.0,70.4,Public,846745
3,62582498,Susan Richardson,9th,Silva High School,69.7,80.3,Public,991918
5,74579444,Cynthia Johnson,9th,Montgomery High School,63.5,76.9,Charter,893368
...,...,...,...,...,...,...,...,...
19508,83985333,Deborah Sanders,10th,Silva High School,60.5,64.6,Public,991918
19509,109236636,Robert Sawyer,12th,Silva High School,43.3,27.2,Public,991918
19511,95516554,Megan Gill,11th,Wagner High School,93.9,84.1,Public,846745
19512,65050383,Lori Stone,11th,Bowers High School,94.6,70.9,Public,848324


In [86]:
# Check for duplicate rows
s_df.duplicated().sum()

0

In [88]:
# Remove duplicate rows
s_df2 = s_df.drop_duplicates()
s_df2.duplicated().sum()

0

In [89]:
# Check the data types of the column
s_df2.dtypes

student_id         int64
student_name      object
grade             object
school_name       object
reading_score    float64
math_score       float64
school_type       object
school_budget      int64
dtype: object

In [91]:
# Remove the "th" suffix from every value
s_df2.loc[:, "grade"] = s_df2.loc[:, "grade"].str.replace("th","")
s_df2.loc[:,"grade"]

0         9
1         9
2         9
3         9
5         9
         ..
19508    10
19509    12
19511    11
19512    11
19513    12
Name: grade, Length: 14831, dtype: object

In [94]:
# Change "grade" column to the "int" type & Verify the column types
s_df2.loc[:, "grade"] = s_df2.loc[:, "grade"].astype("int")
s_df2.dtypes

student_id         int64
student_name      object
grade              int64
school_name       object
reading_score    float64
math_score       float64
school_type       object
school_budget      int64
dtype: object

### Deliverable 3: Summarize the Data

In [95]:
# Generate summary statistic
s_df2.describe()

Unnamed: 0,student_id,grade,reading_score,math_score,school_budget
count,14831.0,14831.0,14831.0,14831.0,14831.0
mean,69752960.0,10.355539,72.357865,64.675733,893742.749107
std,34529090.0,1.097728,15.22459,15.844093,53938.066467
min,10009060.0,9.0,10.5,3.7,817615.0
25%,39844330.0,9.0,62.2,54.5,846745.0
50%,69659780.0,10.0,73.8,65.3,893368.0
75%,99274490.0,11.0,84.0,76.0,956438.0
max,129999700.0,12.0,100.0,100.0,991918.0


In [96]:
# Display math mean score
s_df2["math_score"].mean()

64.67573326141189

In [97]:
# Store min reading score
min_reading_score = s_df2["reading_score"].min()
min_reading_score

10.5

### Deliverable 4: Drill Down into the Data

In [98]:
# Display the grade column
s_df2.loc[:,"grade"]

0         9
1         9
2         9
3         9
5         9
         ..
19508    10
19509    12
19511    11
19512    11
19513    12
Name: grade, Length: 14831, dtype: int64

In [105]:
# Display the first three rows of Columns 3, 4, and 5
s_df2.iloc[0:3, [3, 4, 5]]

Unnamed: 0,school_name,reading_score,math_score
0,Sullivan High School,59.0,88.2
1,Dixon High School,94.7,73.5
2,Wagner High School,89.0,70.4


In [112]:
# Select the rows for Grade 9, and display their summary statistics
filter = s_df2["grade"] < 10
s_df2[filter].describe()

Unnamed: 0,student_id,grade,reading_score,math_score,school_budget
count,4132.0,4132.0,4132.0,4132.0,4132.0
mean,69794410.0,9.0,69.236713,66.585624,898692.606002
std,34705650.0,0.0,15.277354,16.661533,54891.596611
min,10009060.0,9.0,17.9,5.3,817615.0
25%,39538480.0,9.0,59.0,56.0,846745.0
50%,69840370.0,9.0,70.05,67.8,893368.0
75%,99395040.0,9.0,80.5,78.5,957299.0
max,129999700.0,9.0,99.9,100.0,991918.0


In [135]:
# Store the row with the minimum overall reading score
min_reading_row = s_df2.loc[s_df2["reading_score"] == min_reading_score]
min_reading_row

Unnamed: 0,student_id,student_name,grade,school_name,reading_score,math_score,school_type,school_budget
0,103880842,Travis Martin,9,Sullivan High School,10.5,88.2,Public,961125
1,45069750,Michael Brown,9,Dixon High School,10.5,73.5,Charter,870334
2,45024902,Gabriela Lucero,9,Wagner High School,10.5,70.4,Public,846745
3,62582498,Susan Richardson,9,Silva High School,10.5,80.3,Public,991918
5,74579444,Cynthia Johnson,9,Montgomery High School,10.5,76.9,Charter,893368
...,...,...,...,...,...,...,...,...
19508,83985333,Deborah Sanders,10,Silva High School,10.5,64.6,Public,991918
19509,109236636,Robert Sawyer,12,Silva High School,10.5,27.2,Public,991918
19511,95516554,Megan Gill,11,Wagner High School,10.5,84.1,Public,846745
19512,65050383,Lori Stone,11,Bowers High School,10.5,70.9,Public,848324


In [125]:
# Select all the reading scores from the 10th graders at Dixon High School


In [None]:
# Find the mean reading score for all the students in Grades 11 and 12


### Deliverable 5: Compare the Data

In [None]:
# Display the average budget for each school type


In [None]:
# Find the total number of students at each school


In [None]:
# Sort those numbers from largest to smallest


In [None]:
# Find the average math score by grade for each school type


### Deliverable 6: Findings

Write a few sentences to describe any discoveries that you made while performing your analysis. Include any additional analysis that you believe would be worthwhile.