## Great Schools Integration into GeoPandas

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import Point, Polygon
from fiona.drvsupport import supported_drivers
import folium as fm
import re
%matplotlib inline

# Build Schools DataFrame
schools_df = pd.read_csv('./Data/greatSchools.csv', dtype={'Zip': object})
schools_df = schools_df.fillna(5) #fill in NaN with 5s
schools_df['Rating'] = schools_df['Rating'].astype('int64')

In [2]:
grouped_df = schools_df.groupby(by='Zip_Code')['Rating'].mean()
grouped_df.index = grouped_df.index.astype(str)

In [4]:
#Zip Code data frame
supported_drivers['KML'] = 'rw'
place_df = gpd.read_file('./Data/Geographies/DC_MD_VA_Zipcodes.kml', driver='KML')

def find_zip_code(cell):
    match = re.search(r'<at><openparen>(\d{5})<closeparen>', cell)

    if match:
        return match.group(1)
    else:
        return None

zipCodes = list()
for r in place_df.iterrows():
    zipCodes.append(find_zip_code(r[1]['Name']))

place_df['Zip_Code'] = zipCodes

In [5]:
#Merging crime and zip DataFrames. Not that not every city in zip has reported crime data to FBI data base
school_df_merge = pd.merge(place_df, grouped_df, left_on='Zip_Code', right_index=True)

school_df_merge = school_df_merge.fillna(0) #filling Nan with 0's

#setting zips without reported school rating to the average of the population reported by great schools
school_df_merge.loc[school_df_merge['Rating']==0, ['Rating']] = 5
display(school_df_merge.head())

Unnamed: 0,Name,Description,geometry,Zip_Code,Rating
0,<at><openparen>20001<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.02758 38.90964 0.00000, -77.02...",20001,5.315789
1,<at><openparen>20002<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.01217 38.89209 0.00000, -77.01...",20002,4.871795
2,<at><openparen>20003<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.01402 38.88236 0.00000, -77.01...",20003,5.266667
3,<at><openparen>20004<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.03365 38.89735 0.00000, -77.03...",20004,6.0
4,<at><openparen>20005<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.03654 38.90252 0.00000, -77.03...",20005,6.0
