<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Read-Property" data-toc-modified-id="Read-Property-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Read Property</a></span></li><li><span><a href="#Distance-Bus-Stop" data-toc-modified-id="Distance-Bus-Stop-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Distance Bus Stop</a></span></li><li><span><a href="#Distance-Train-Station" data-toc-modified-id="Distance-Train-Station-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Distance Train Station</a></span></li><li><span><a href="#Distance-School" data-toc-modified-id="Distance-School-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Distance School</a></span></li><li><span><a href="#Calculate-Routine-Distance-by-openrouteservice" data-toc-modified-id="Calculate-Routine-Distance-by-openrouteservice-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Calculate Routine Distance by openrouteservice</a></span></li></ul></div>

In [None]:
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

# Read Property

In [None]:
df_property=pd.read_csv("../data/curated/domain_2.csv")


df_property=df_property[["url","coordinates"]].copy()
df_property["longitude"]=df_property["coordinates"].map(lambda x: eval(x)[1])
df_property["latitude"]=df_property["coordinates"].map(lambda x: eval(x)[0])

# create gdf
gdf_property = gpd.GeoDataFrame(df_property, geometry=gpd.points_from_xy(df_property.longitude, df_property.latitude, crs="EPSG:4326"))
gdf_property['geometry']  = gdf_property['geometry'] .to_crs("EPSG:4326")
gdf_property=gdf_property[["url","geometry"]].copy()
gdf_property=gdf_property.reset_index(drop=True)
gdf_property.head()

In [None]:
gdf_property.crs

# Distance Bus Stop

In [None]:
gdf_busstop=gpd.read_file("../data/raw/PTV_data/PTV_METRO_BUS_STOP/PTV_METRO_BUS_STOP.shp")
gdf_busstop=gdf_busstop[['STOP_ID', 'STOP_NAME', 'LATITUDE', 'LONGITUDE', 'geometry']].copy()
gdf_busstop['geometry']  = gdf_busstop['geometry'] .to_crs("EPSG:4326")

gdf_regionalbusstop=gpd.read_file("../data/raw/PTV_data/PTV_REGIONAL_BUS_STOP/PTV_REGIONAL_BUS_STOP.shp")
gdf_regionalbusstop=gdf_regionalbusstop[['STOP_ID', 'STOP_NAME', 'LATITUDE', 'LONGITUDE',  'geometry']].copy()
gdf_regionalbusstop['geometry']  = gdf_regionalbusstop['geometry'] .to_crs("EPSG:4326")

gdf_busstop=pd.concat([gdf_busstop,gdf_regionalbusstop],ignore_index=True)
gdf_busstop.head()

In [None]:
gdf_busstop.crs

In [None]:
# define function to calculate the top20 nearest locations' coordinates by geopandas

def get_20_nearest_locations(gdf, point):
    dist_series=gdf["geometry"].distance(point)
    top10_idx=dist_series.argsort().values[:20]
    
    top20_cordinates=gdf.iloc[top10_idx]["geometry"].map(lambda p:(p.x,p.y)).values

    return top20_cordinates

get_20_nearest_locations(gdf=gdf_busstop, point=gdf_property["geometry"][1])

In [None]:
gdf_property["top20_near_busstop"]=gdf_property["geometry"].map(lambda x: get_20_nearest_locations(gdf=gdf_busstop, point=x))
gdf_property.head()

# Distance Train Station

In [None]:
gdf_trainstation=gpd.read_file("../data/raw/PTV_data/PTV_METRO_TRAIN_STATION/PTV_METRO_TRAIN_STATION.shp")
gdf_trainstation['geometry']  = gdf_trainstation['geometry'] .to_crs("EPSG:4326")
gdf_trainstation.head()

In [None]:
gdf_property["top20_near_trainstation"]=gdf_property["geometry"].map(lambda x: get_20_nearest_locations(gdf=gdf_trainstation, point=x))
gdf_property.head()

# Distance School

+ there are 4 types of schools, we calculate the distance by school type

In [None]:
df_school = pd.read_excel("../data/raw/SearchResults.xlsx")
df_school.head()

In [None]:
df_school["Type"].value_counts()

In [None]:
# create gdf
gdf_school = gpd.GeoDataFrame(df_school, geometry=gpd.points_from_xy(df_school.Longitude, df_school.Latitude, crs="EPSG:4326"))
gdf_school['geometry']  = gdf_school['geometry'] .to_crs("EPSG:4326")
gdf_school.head()

In [None]:
# primary school
gdf_property["top20_near_primary"]=gdf_property["geometry"].map(lambda x: get_20_nearest_locations(gdf=gdf_school[gdf_school["Type"]=='Primary'], point=x))

# Secondary school
gdf_property["top20_near_secondary"]=gdf_property["geometry"].map(lambda x: get_20_nearest_locations(gdf=gdf_school[gdf_school["Type"]=='Secondary'], point=x))


# Combined school
gdf_property["top20_near_combined"]=gdf_property["geometry"].map(lambda x: get_20_nearest_locations(gdf=gdf_school[gdf_school["Type"]=='Combined'], point=x))


# special school
gdf_property["top20_near_special"]=gdf_property["geometry"].map(lambda x: get_20_nearest_locations(gdf=gdf_school[gdf_school["Type"]=='Special'], point=x))
gdf_property.head()

In [None]:
gdf_property["property_cordinate"]=gdf_property["geometry"].map(lambda p:(p.x,p.y))
gdf_property.head()

# Calculate Routine Distance by openrouteservice

In [None]:
import openrouteservice as ors

def calculate_route_distance(start_cordinate, end_cordinates):
    client = ors.Client(base_url="http://localhost:8080/ors")
   
    distances=[]
    
    # calculate 20 times, as we have 20 coordinates for each [busstop, trainstation...]
    for i in range(20):
        coordinates = [[start_cordinate[0], start_cordinate[1]],[end_cordinates[i][0], end_cordinates[i][1]]]
        
        try:
            route = client.directions(
                coordinates=coordinates,
                profile='driving-car',
                format='geojson',
                validate=False,
            )
            distances.append(route['features'][0]["properties"]["summary"]["distance"])
        except Exception as err:
            
            # if error, the distance is more than 100000 metres
            distances.append(1000000)
#             print(err)
        
    # 
    return np.nanmin(np.array(distances))

In [None]:
%%time

gdf_property["dist_nearest_busstop"]=gdf_property.apply(lambda row: 
                                                        calculate_route_distance(
                                                            start_cordinate=row["property_cordinate"], 
                                                            end_cordinates=row["top20_near_busstop"]), axis=1)

gdf_property["dist_nearest_trainstation"]=gdf_property.apply(lambda row: 
                                                        calculate_route_distance(
                                                            start_cordinate=row["property_cordinate"], 
                                                            end_cordinates=row["top20_near_trainstation"]), axis=1)


gdf_property["dist_nearest_primary_school"]=gdf_property.apply(lambda row: 
                                                        calculate_route_distance(
                                                            start_cordinate=row["property_cordinate"], 
                                                            end_cordinates=row["top20_near_primary"]), axis=1)

gdf_property["dist_nearest_secondary_school"]=gdf_property.apply(lambda row: 
                                                        calculate_route_distance(
                                                            start_cordinate=row["property_cordinate"], 
                                                            end_cordinates=row["top20_near_secondary"]), axis=1)

gdf_property["dist_nearest_combined_school"]=gdf_property.apply(lambda row: 
                                                        calculate_route_distance(
                                                            start_cordinate=row["property_cordinate"], 
                                                            end_cordinates=row["top20_near_combined"]), axis=1)

gdf_property["dist_nearest_special_school"]=gdf_property.apply(lambda row: 
                                                        calculate_route_distance(
                                                            start_cordinate=row["property_cordinate"], 
                                                            end_cordinates=row["top20_near_special"]), axis=1)

In [None]:
df_property=pd.read_csv("../data/curated/domain_2.csv")
df_property_result=pd.merge(df_property, gdf_property[['url','dist_nearest_busstop', 'dist_nearest_trainstation', 
                                                       'dist_nearest_primary_school', 'dist_nearest_secondary_school', 'dist_nearest_combined_school',
                                                       'dist_nearest_special_school']], on="uri",how="left")
df_property_result.head()

In [None]:
df_property_result.shape

In [None]:
df_property_result["dist_nearest_primary_school"]=df_property_result.apply(lambda row: 
                                                                           row["dist_nearest_primary_school"] 
                                                                           if row["dist_nearest_primary_school"] <row["dist_nearest_combined_school"] 
                                                                          else row["dist_nearest_combined_school"], axis=1)

df_property_result["dist_nearest_secondary_school"]=df_property_result.apply(lambda row: 
                                                                           row["dist_nearest_secondary_school"] 
                                                                           if row["dist_nearest_secondary_school"] <row["dist_nearest_combined_school"] 
                                                                          else row["dist_nearest_combined_school"], axis=1)

df_property_result=df_property_result.drop(["dist_nearest_combined_school","dist_nearest_special_school"], axis=1)
df_property_result.to_csv("../data/curated/domain_final2.csv", index=False)