# Schooling

In [1]:
from census import Census
from dotenv import load_dotenv
from bson import ObjectId
import json, requests
from folium import Map, Marker, Icon, FeatureGroup, LayerControl, Choropleth
from folium.plugins import HeatMap
from folium.vector_layers import Circle, Polygon
import pandas as pd

In [2]:
from pymongo import MongoClient
client = MongoClient()
db = client.get_database('companies_project')

In this third filter I will add to the candidates the results of some queries for scholarization ratios and the education level of each of the places, extracted from the Census API. This data will be hosted inside 'offices.demographics'.


In [3]:
load_dotenv()
census_api_k = os.getenv('census_api_key')
c = Census(census_api_k, year=2018)

In [4]:
def schooling(finded):
    counter = 0
    for comp in finded:
        id = ObjectId(comp['_id'])
        zip_code = comp['offices']['zip_code'][:5]
        try:
            schoolP = {'kinder_enrollP': c.acs5dp.zipcode('DP02_0054PE', zip_code)[0]['DP02_0054PE'], 
                       'presch_enrollP': c.acs5dp.zipcode('DP02_0053PE', zip_code)[0]['DP02_0053PE'], 
                       'element_enrollP': c.acs5dp.zipcode('DP02_0055PE', zip_code)[0]['DP02_0055PE']}
            educ_lvlP = {'highschP': c.acs5dp.zipcode('DP02_0066PE', zip_code)[0]['DP02_0066PE'],
                         'bachelP': c.acs5dp.zipcode('DP02_0067PE', zip_code)[0]['DP02_0067PE']} 
            db.companies_usa_filt1.update_one({'_id':id}, {'$set':{'offices.demographics.school%': schoolP}})
            db.companies_usa_filt1.update_one({'_id':id}, {'$set':{'offices.demographics.educ_lvl%': educ_lvlP}})
        except:
            counter += 1
    return f'Failed to append schooling in {counter} documents'

In [140]:
offices = list(db.companies_usa_filt2.find({}, {'_id':1, 'offices.zip_code':1}))

In [15]:
schooling(offices)

'Failed to append schooling in 0 documents'

I will just take those places where the ratios of enrollment for kindergardens, preschool and elementary school is equal or higher than 4%; and where the ratio of people graduated in either highschool or higher, or bachelor or higher is more than 75%.

In [5]:
offices_edu = list(db.companies_usa_filt2_c.find({'offices.demographics.school%.kinder_enrollP':{'$gte':4},
                                                'offices.demographics.school%.presch_enrollP':{'$gte':4},
                                                'offices.demographics.school%.element_enrollP':{'$gte':4},
                                                'offices.demographics.educ_lvl%.highschP':{'$gte':75},
                                                'offices.demographics.educ_lvl%.bachelP':{'$gte':75}}))

In [6]:
len(offices_edu)

104

This left me with 104 candidates. Let's check now, for each of those, how many schools are within a 8-kilometer radius area. I will use the Google Places API for that purpose.

In [64]:
load_dotenv()
goog_cloud_k = os.getenv('google_cloud')

In [103]:
def schools_around(places):
    for office in places:
        latitude = office['offices']['latitude']
        longitude = office['offices']['longitude']
        id = ObjectId(office['_id'])
        
        url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?'
        params = dict(
            location = f"{latitude},{longitude}",
            radius = '8000',
            keyword = 'school',
            type = 'primary_school',
            key = f"{goog_cloud_k}")

        resp = requests.get(url=url, params=params)
        data = json.loads(resp.text)

        data_format = {}
        index = 0
        for school in data['results']:
            data_format[str(index)] = school
            index += 1

        db.companies_usa_filt1.update_one({'_id':id}, {'$set':{'offices.schools': data_format}})

In [104]:
schools_around(offices_edu)

Let's check now if there are any place where the number of schools around is lower than usual.

In [7]:
amount = []
for off in offices_edu:
    counter = 0
    for school in off['offices']['schools']:
        counter += 1
    amount.append(counter)
print(set(amount))

{20}


As it can be seen, all the places have 20 or more schools 8 kilometers away or less. So there are 104 candidates left for the next filter. Let's save them into the database as a new collection and move onto the next filter.

In [8]:
db.companies_usa_filt3_c.insert_many(offices_edu)

<pymongo.results.InsertManyResult at 0x7ffd7168a600>

# Map

In [4]:
offices_filter2 = list(db.companies_usa_filt2_c.find({}, {'offices':1}))
to_df = {'latitude': [], 'longitude': [], 'city': [], 'state': [], 'kinder_rat': [], 'presch_rat': [], 'element_rat': [], 'highs_g': [], 'bachel_g': []}

for office in offices_filter2:
    to_df['latitude'].append(office['offices']['latitude'])
    to_df['longitude'].append(office['offices']['longitude'])
    to_df['city'].append(office['offices']['city'])
    to_df['state'].append(office['offices']['state_code'])
    to_df['kinder_rat'].append(office['offices']['demographics']['school%']['kinder_enrollP'])
    to_df['presch_rat'].append(office['offices']['demographics']['school%']['presch_enrollP'])
    to_df['element_rat'].append(office['offices']['demographics']['school%']['element_enrollP'])
    to_df['highs_g'].append(office['offices']['demographics']['educ_lvl%']['highschP'])
    to_df['bachel_g'].append(office['offices']['demographics']['educ_lvl%']['bachelP'])

In [5]:
school_df = pd.DataFrame.from_dict(to_df)
school_df.head()

Unnamed: 0,latitude,longitude,city,state,kinder_rat,presch_rat,element_rat,highs_g,bachel_g
0,40.723731,-73.996431,New York,NY,1.2,3.6,14.2,94.9,80.5
1,37.764726,-122.394523,San Francisco,CA,4.5,12.1,23.8,91.8,68.9
2,37.789634,-122.404052,San Francisco,CA,4.5,12.1,23.8,91.8,68.9
3,40.757929,-73.985506,New York City,NY,4.5,6.0,17.7,92.5,72.2
4,40.757725,-73.986011,New York,NY,4.5,6.0,17.7,92.5,72.2


In [6]:
m_heat = Map(location=[37.0902,-95.7129],zoom_start=4)

all = HeatMap(data=school_df[["latitude","longitude"]], name='Offices', radius=10)
fg = FeatureGroup(name="Offices")
all.add_to(fg)
fg.add_to(m_heat)

school_sel_df = school_df[(school_df['kinder_rat'] >= 4) & (school_df['presch_rat'] >= 4) & (school_df['element_rat'] >= 4) & (school_df['highs_g'] >= 75) & (school_df['bachel_g'] >= 75)]

fg_sel = FeatureGroup(name='Selected')
for i, row in school_sel_df.iterrows():
    place = {"location":[row["latitude"],row["longitude"]], "popup":row["city"], "tooltip":row["state"]}
    ic = Icon(color='blue', prefix='glyphicon', icon='glyphicon-briefcase', icon_color='white')
    Marker(**place, icon=ic).add_to(fg_sel)
fg_sel.add_to(m_heat)

LayerControl(collapsed=False).add_to(m_heat)
m_heat