In [2]:
import pandas as pd

In [3]:
# Read .csv files of school grades and school ZIP codes from Florida Department of Education
file = "./Resources/Outside_Resources/School_Data/SchoolGrades19_clean.csv"
school_grades = pd.read_csv(file)

# Read file with school zip code info
file2 = "./Resources/Outside_Resources/School_Data/SchoolZipCodes.csv"
school_zip = pd.read_csv(file2)

In [4]:
# Drop all columns in school_grades but school number and grade in percent
# Original file has no blanks in relevant fields
school_grades = school_grades.loc[:, ["School Number", "Percent of Total Possible Points"]]

school_grades.head()

Unnamed: 0,School Number,Percent of Total Possible Points
0,31,54
1,41,56
2,71,32
3,91,58
4,101,55


In [5]:
# Drop all columns in school_zip but school number and ZIP code
school_zip = school_zip.loc[:, ["SCHOOL", "ZIP"]]

# Drop all rows without a valid ZIP code
# (Blank or invalid ZIP codes were changed to 12345 in CSV file)
index_names = school_zip[school_zip['ZIP'] == 12345 ].index
school_zip = school_zip.drop(index_names)

school_zip.head()

Unnamed: 0,SCHOOL,ZIP
0,221,32615
1,52,32601
2,112,32641
3,400,32601
4,602,32609


In [6]:
# Add column for zip code in school_grades
school_grades['zip']= ""
school_grades.head()

Unnamed: 0,School Number,Percent of Total Possible Points,zip
0,31,54,
1,41,56,
2,71,32,
3,91,58,
4,101,55,


In [7]:
# Ensure number formats match between dataframes
# Convert numbers in school_zip so they match number formats in school_grades_elem_xc
school_zip.astype({'SCHOOL': 'int64'}, {'ZIP': 'int64'}).dtypes
school_grades.astype({'School Number': 'int64'}, {'Percent of Total Possible Points': 'int64'})

Unnamed: 0,School Number,Percent of Total Possible Points,zip
0,31,54,
1,41,56,
2,71,32,
3,91,58,
4,101,55,
...,...,...,...
3332,391,66,
3333,7001,70,
3334,7001,58,
3335,7001,69,


In [8]:
# Match ZIP codes from school_zip to school codes in school_grades to populate ZIP column in school_grades
for i, row in school_zip.iterrows():
    school_grades.loc[school_grades['School Number'] == row['SCHOOL'], "zip"] = row['ZIP']

school_grades.head()

Unnamed: 0,School Number,Percent of Total Possible Points,zip
0,31,54,32327
1,41,56,32428
2,71,32,32327
3,91,58,32327
4,101,55,32428


In [9]:
# Group schools and grades by ZIP and get average grade for ZIP
grades_by_zip = school_grades.groupby(['zip']).mean()

# Drop school # column
grades_by_zip = grades_by_zip.drop('School Number', axis=1)

# Round percentages, make numbers into ints, fix col headings
grades_by_zip = grades_by_zip.round()
grades_by_zip = grades_by_zip.astype({'Percent of Total Possible Points': 'int64'})
grades_by_zip = grades_by_zip.rename(columns={"Percent of Total Possible Points": "percent_total_points"})

grades_by_zip.head()

Unnamed: 0_level_0,percent_total_points
zip,Unnamed: 1_level_1
32008,63
32052,58
32064,58
32081,79
32083,78


In [10]:
# Re-index dataframe so ZIP is a column
grades_by_zip.reset_index(inplace = True)
grades_by_zip.head()

Unnamed: 0,zip,percent_total_points
0,32008,63
1,32052,58
2,32064,58
3,32081,79
4,32083,78


In [11]:
# Create dataframe with column for letter grade based on percent of total points
letter_grades_by_zip = grades_by_zip
letter_grades_by_zip['letter_grade']=''

letter_grades_by_zip.head()

Unnamed: 0,zip,percent_total_points,letter_grade
0,32008,63,
1,32052,58,
2,32064,58,
3,32081,79,
4,32083,78,


In [11]:
# For loop to populate letter_grade column
for i, row in letter_grades_by_zip.iterrows():
    letter_grades_by_zip.loc[letter_grades_by_zip.percent_total_points >= 62, "letter_grade"] = 'A'
    letter_grades_by_zip.loc[(letter_grades_by_zip.percent_total_points <=61) & (letter_grades_by_zip.percent_total_points >= 54), "letter_grade"] = 'B'
    letter_grades_by_zip.loc[(letter_grades_by_zip.percent_total_points <=53) & (letter_grades_by_zip.percent_total_points >= 41), "letter_grade"] = 'C'
    letter_grades_by_zip.loc[(letter_grades_by_zip.percent_total_points <=40) & (letter_grades_by_zip.percent_total_points >= 32), "letter_grade"] = 'D'
    letter_grades_by_zip.loc[letter_grades_by_zip.percent_total_points <= 31, "letter_grade"] = 'F'

letter_grades_by_zip.head()

Unnamed: 0,zip,percent_total_points,letter_grade
0,32008,63,A
1,32052,58,B
2,32064,58,B
3,32081,79,A
4,32083,78,A


In [12]:
letter_grades_by_zip.to_csv('./Resources/Outside_Resources/School_Data/letter_grades_by_zip.csv')

In [13]:
index = letter_grades_by_zip.index
number_of_rows = len(index)
 
print(number_of_rows)

477


In [16]:
from sqlalchemy import create_engine
engine = create_engine('sqlite:///sqlite.db')

letter_grades_by_zip.to_sql(name='grades', con=engine)