In [1]:
import geopy.distance
import json
import os
import pandas as pd
import time

from dotenv import load_dotenv
from shapely.geometry import Point, Polygon
from openrouteservice import client, directions, isochrones



In [11]:
output_dir = '../data/curated/'

## set variables
load_dotenv()
tokens = []
for i in range(1,35): # number of keys
    token = os.environ.get(f'token_{i}')
    tokens.append(token)
nearby_properties = {}

In [20]:
## read  data
property_data = json.load(open( f"{output_dir}properties_processed.json" ))
school_data = pd.read_csv(f'{output_dir}schools/school_info.csv')
filtered_schools = school_data[(school_data['School_Type'] == 'Primary')|(school_data['School_Type'] =='Pri/Sec')|(school_data['School_Type'] == 'Secondary')]

In [None]:
## for each school find the properties in 30 min driving distance
for i in range(0,5):
    token = tokens[7+i]
    schools = filtered_schools.iloc[i*500:i*500+500]
    for index, row in schools.iterrows():
        school_coords = [row['Longitude'], row['Latitude']]
        school_key = f"{row['School_Type']}, {school_coords}"

        ## marker since num schools > isochrone quota
        if school_key not in nearby_properties.keys():
            ors = client.Client(key=token)

            ## find search region
            params_iso = {'locations': [school_coords],
                        'profile': 'driving-car',
                        'range':[1800] # 30 mins
                        }
            iso = ors.isochrones(**params_iso)['features'][0]['geometry']

            ## find all properties that fall in the search region
            for property in property_data['Coordinates'].keys():
                backwards = list(map(float,property_data['Coordinates'][property][1:-1].split(',')))
                coords = Point(backwards[1], backwards[0])
                if coords.within(Polygon(iso['coordinates'][0])):
                    if school_key in nearby_properties.keys():
                        nearby_properties[school_key].append(property)
                    else:
                        nearby_properties[school_key] = [property]
            if school_key not in nearby_properties.keys():
                nearby_properties[school_key] = []
            time.sleep(0.5)

In [62]:
## reverse dictionary to find the schools within a 30 min drive of each property
nearby_schools = {}
for property in property_data['Coordinates'].keys():
    nearby_schools[property]=[]
    for school in nearby_properties.keys():
        if property in nearby_properties[school]:
            nearby_schools[property].append(school)

In [112]:
## find the schools with the closest distance
def get_closest(property, schools):
    prop_coords = (property[1],property[0])
    if len(schools) == 0: ## for if no nearby schools
        return 0
    
    distances = []
    for school in schools:
        distances.append(geopy.distance.geodesic(prop_coords, (school[1],school[0])))
    
    return schools[distances.index(min(distances))]

In [123]:
## find the route
def get_route(start, end, token):
    if end == 0: ## for if no nearby schools
        return {'distance': 0.0, 'duration':0.0}
    
    coords = [tuple(start), tuple(end)]
    ors = client.Client(key=token)
    request = {'coordinates': coords,
            'profile': 'driving-car',
            'geometry': 'true',
            'format_out':'geojson'}
    route = ors.directions(**request)
    time.sleep(0.5)
    return route['features'][0]['properties']['summary']

In [94]:
school_dist = {}
school_dist['Primary']={}
school_dist['Secondary']={}

In [175]:
## find nearest schools
for i in range(0,34):
    token = tokens[i]
    x = len(school_dist['Primary'])
    y = x + 666 # API max 2000/calls per loop 3

    if y > len(nearby_schools.keys()):
        y = len(nearby_schools.keys())

    for property in range(x,y):
        schools = nearby_schools[str(property)]
        values = []
        for school in schools:
            split = list(school.split(', '))
            values.append((split[0], list(map(float,[split[1][1:],split[2][:-1]]))))
        backwards = list(map(float,property_data['Coordinates'][str(property)][1:-1].split(',')))
        prop_coords = [backwards[1], backwards[0]]
    
        ## separate school types
        pri_sec = [x[1] for x in values if x[0]=='Pri/Sec']
        pri = [x[1] for x in values if x[0]=='Primary']
        sec = [x[1] for x in values if x[0]=='Secondary']
    
        ## find closest school
        pri_sec_school = get_closest(prop_coords,pri_sec)
        pri_school = get_closest(prop_coords,pri)
        sec_school = get_closest(prop_coords,sec)
    
        ## get route to closest school
        pri_sec_route = get_route(prop_coords,pri_sec_school, token)
        pri_route = get_route(prop_coords,pri_school, token)
        sec_route = get_route(prop_coords,sec_school, token)
    
        ## save information
        if (pri_sec_route['duration'] < pri_route['duration']) & (pri_sec_route['duration']!=0) | (pri_sec_route['duration'] > pri_route['duration']) & (pri_route['duration']==0):
            school_dist['Primary'][str(property)] = pri_sec_route
        else:
            school_dist['Primary'][str(property)] = pri_route
        
        if (pri_sec_route['duration'] < sec_route['duration']) & (pri_sec_route['duration']!=0) | (pri_sec_route['duration'] > sec_route['duration']) & (sec_route['duration']==0):
            school_dist['Secondary'][str(property)] = pri_sec_route
        else:
            school_dist['Secondary'][str(property)] = sec_route

In [179]:
school_count = {}
for property in nearby_schools.keys():
    school_count[property] = len(nearby_schools[property])

In [182]:
property_data['Nearby_Schools'] = school_count
property_data['Primary_Distance'] = school_dist['Primary']
property_data['Secondary_Distance'] = school_dist['Secondary']

In [185]:
## write json
json.dump(property_data, open(f"{output_dir}school_info.json", 'w'))

# write csv
data = pd.read_json(f"{output_dir}school_info.json")
data.to_csv(f"{output_dir}school_info.csv")