In [1]:
#Import packages
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import requests
import io

import requests
import urllib
from urllib.request import urlopen, Request
from io import BytesIO
from io import StringIO  
from zipfile import ZipFile

In [2]:
#global values

##Input filepath for the following folders

#Resale flat prices 1990-1999
fp_90_99="Relevant_datasets/ResaleFlatPricesBasedonApprovalDate19901999.csv"
#Resale flat prices 2000-Feb 2012
fp_00_12="Relevant_datasets/ResaleFlatPricesBasedonApprovalDate2000Feb2012.csv"
#Resale flat prices Mar 2012 - Dec 2014
fp_12_14="Relevant_datasets/ResaleFlatPricesBasedonRegistrationDateFromMar2012toDec2014.csv"
#Resale flat prices Jan 2015-Dec 2016
fp_15_16="Relevant_datasets/ResaleFlatPricesBasedonRegistrationDateFromJan2015toDec2016.csv"
#Resale flat prices Jan 2017 onwards
fp_17_onw="Relevant_datasets/ResaleflatpricesbasedonregistrationdatefromJan2017onwards.csv"

#OneMap header
headers = {'Authorization':'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIyZTcyYWQ5MjFiODI0ZTM4MWNhYzMwODc0MTE2YzQ1ZSIsImlzcyI6Imh0dHA6Ly9pbnRlcm5hbC1hbGItb20tcHJkZXppdC1pdC0xMjIzNjk4OTkyLmFwLXNvdXRoZWFzdC0xLmVsYi5hbWF6b25hd3MuY29tL2FwaS92Mi91c2VyL3Bhc3N3b3JkIiwiaWF0IjoxNzE3NTAxODcyLCJleHAiOjE3MTc3NjEwNzIsIm5iZiI6MTcxNzUwMTg3MiwianRpIjoiSHZ3cEExSlVvR3ZLSmpCciIsInVzZXJfaWQiOjM3MTMsImZvcmV2ZXIiOmZhbHNlfQ.lk2uh3oDJ4EuNHxbhI3oi9i2M5gW5Q82_KIFqGJht94'}

#Output filepath
Output_fp_Address_coord="Relevant_datasets/Address_Coord.csv"


In [3]:
##Create relevant functions


#Get coordinates from one map using request (for either latitude or longitude) and location
def getcoord(Req,Location):
    URL="https://www.onemap.gov.sg/api/common/elastic/search?searchVal="+Location+"&returnGeom=Y&getAddrDetails=N"
    r=requests.get(URL,headers=headers)
    json_data=r.json()
    #if onemap returns at least 1 searchval,return the latitude/longitude, else return NA
    if json_data['found']!=0:
        dictdata=json_data['results'][0]
        Latitude=dictdata[Req]
        return Latitude
    else:
        return 'NA'


## Coordinates Street Name
Given the limited API calls per minute of 250, and the large size of the resale flat dataset, i decided to get the list of unique addresses and pass them through onemap all at once, and map them back to the original dataset later

#### Plan

1.Append all datasets together \
2.Extract a list of unique street names from all dataframes \
3.Run list of unique street names through onemap \
4.fix missing rows \
4.Export the list of street names and coordinates to csv for further processing (if needed) \

In [4]:
###1.Append all datasets together###
##Read 

#read dataframe 1990-1999
RSF_90_99=pd.read_csv(fp_90_99)

#read dataframe 2000-2012
RSF_00_12=pd.read_csv(fp_00_12)

#read dataframe 2012-2014
RSF_12_14=pd.read_csv(fp_12_14)

#read dataframe 2015-2016
RSF_15_16=pd.read_csv(fp_15_16)

#read dataframe 2017 onwards
RSF_17_onw=pd.read_csv(fp_17_onw)


##Creating list of dataframes for easy reference

RSF_list=[RSF_90_99,RSF_00_12,RSF_12_14,RSF_15_16,RSF_17_onw]

In [5]:
#append all datasets together
RSF90_onw=RSF_90_99
for i in range(1,5):
    RSF90_onw=pd.concat([RSF90_onw, RSF_list[i]])

In [6]:
###2.Extract a list of unique street names from all dataframes###

#Create list of all unique addresses
Address=pd.DataFrame(RSF90_onw['street_name'].unique(),columns=['street_name'])
#Create empty Latitude and Longitude column
Address[['Lat','Lon']]=""

print("col list:",list(Address.columns))

col list: ['street_name', 'Lat', 'Lon']


In [7]:
###3.Run list of unique street names through onemap###

#Get Latitude and Longitude for each street name through API request to OneMap
for index,row in Address.iterrows():
        Location=Address['street_name'][index]
        Address['Lat'][index]=getcoord('LATITUDE',Location)
        Address['Lon'][index]=getcoord('LONGITUDE',Location)


In [8]:
#EDA: To check for rows where coordinates were not returned
print("Empty Lat", Address.loc[Address['Lat']=='NA']['street_name'].unique())
print("Empty Lon", Address.loc[Address['Lon']=='NA']['street_name'].unique())
#output is: array(['KG BAHRU HILL', 'JLN PASAR BARU', 'NILE RD', 'JLN MEMBINA BARAT','BUANGKOK STH FARMWAY 1'], dtype=object)

#To utilise town data and cross-reference missing addresses with google map to find street name within town that resembles the names of addresses with missing coordinates
Missing_list=list(Address.loc[Address['Lat']=='NA']['street_name'].unique())
print(Missing_list)

Empty Lat ['KG BAHRU HILL' 'JLN PASAR BARU' 'NILE RD' 'JLN MEMBINA BARAT'
 'BUANGKOK STH FARMWAY 1']
Empty Lon ['KG BAHRU HILL' 'JLN PASAR BARU' 'NILE RD' 'JLN MEMBINA BARAT'
 'BUANGKOK STH FARMWAY 1']
['KG BAHRU HILL', 'JLN PASAR BARU', 'NILE RD', 'JLN MEMBINA BARAT', 'BUANGKOK STH FARMWAY 1']


In [9]:
#Get dataframe of streetname and town
RSF=RSF90_onw[['street_name','town']]
#create list of street name and town with missing lat and lon
RSF_missing=RSF.loc[RSF['street_name'].isin(Missing_list)]

#drop duplicates
RSF_missing=RSF_missing.drop_duplicates(subset=['street_name', 'town'])

In [10]:
RSF_missing

Unnamed: 0,street_name,town
461,KG BAHRU HILL,BUKIT MERAH
629,JLN PASAR BARU,GEYLANG
3918,NILE RD,BUKIT MERAH
3924,JLN MEMBINA BARAT,BUKIT MERAH
4404,BUANGKOK STH FARMWAY 1,SENGKANG


In [11]:
##Finding closest named plausible addresses based on their town and street name or google research

#Before changing, to create a column with old street names to facilitate easier mapping later.
Address['street_names_oldver']=Address['street_name']

#KG BAHRU HILL is changed to: Kampong Bahru Rd in Bukit Merah
Address.loc[Address['street_name']=='KG BAHRU HILL','street_name']='Kampong Bahru Rd'
#JLN PASAR BARU is changed to: SRI GEYLANG SERAI
Address.loc[Address['street_name']=='JLN PASAR BARU','street_name']='SRI GEYLANG SERAI'
#Unable to find nile road, to drop
Address=Address.drop(Address[Address['street_name']=='NILE RD'].index)
#JLN MEMBINA BARAT is changed to :Jln Membina
Address.loc[Address['street_name']=='JLN MEMBINA BARAT','street_name']='JALAN MEMBINA'
#BUANGKOK STH FARMWAY 1 is changed to: Compassvale View  [found that the location is now Compassvale]
Address.loc[Address['street_name']=='BUANGKOK STH FARMWAY 1','street_name']='COMPASSVALE VIEW'

In [12]:
## To re-request for coordinates

#Get Latitude and Longitude for each street name with missing coordinates through API request to OneMap
for index,row in Address.iterrows():
    if Address['Lat'][index]=='NA':
        Location=Address['street_name'][index]
        Address['Lat'][index]=getcoord('LATITUDE',Location)
        Address['Lon'][index]=getcoord('LONGITUDE',Location)

In [13]:
print("no. of rows with missing Lat:",Address.loc[Address['Lat']=='NA'].count()[0])
print("no. of rows with missing Lon:",Address.loc[Address['Lon']=='NA'].count()[0])

no. of rows with missing Lat: 0
no. of rows with missing Lon: 0


In [14]:
###4.Export the list of street names and coordinates to csv for further processing (if needed)###

Address

#Export Coord dataset
Address.to_csv(Output_fp_Address_coord)