In [1]:
#Import all necessary libraries
import math
import time
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut

In [23]:
data = []
with open("../Datasets/2017onwards.csv",'r') as f:
  for row in f:
    data.append(row.strip('\n').split(','))

In [4]:
#Uses the Nominamtim API to obtain latitude and longitude for the street
def get_lat_lon(address):

    #Initializes the geocoder and collects the location information
    geolocator = Nominatim(user_agent="Jupyter_AI_Project_HomeBros")
    location = geolocator.geocode(address, timeout=10)

    #Begins up to 5 attempts to find location, increasing delay time each attempt in case of timeout
    for attempt in range(5):
        delay = 1
        try:
            location = geolocator.geocode(address, timeout=10)

            #Checks if location found. If not, prints address and returns None.
            #When not found, address was examined to identify why. (During preprocessing. Resulted in the many if statements above)
            if location:
                return location.latitude, location.longitude
            else:
                print(address)
                return None, None

        except GeocoderTimedOut:
            print(f"⏱️ Timeout on attempt {attempt+1} for '{address}'... retrying in {delay}s.")
            time.sleep(delay)
            delay += 1


In [5]:
# Calculates shortest possible distance to CBD in Kilometres
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth's radius in kilometers
    phi1 = math.radians(lat1)
    phi2 = math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)

    a = math.sin(delta_phi/2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    distance = R * c
    return round(distance,3)

In [24]:
# Selects the unique street names for further processing
# Replaces streets that no longer exist with streets near the location in the past
# Replaces short forms into full words for Nominatim search
streets = []
unique_streets = []
for row in data[1:]:
    new_strt = row[4]

    # Storage to retrieve indexes later
    if new_strt not in streets:
      streets.append(new_strt)
      
    # Processing
    if " NTH " in new_strt:
      new_strt = new_strt.replace(" NTH ", " NORTH ")
    if " NTH" in new_strt:
      new_strt = new_strt.replace(" NTH", " NORTH")
    if " STH " in new_strt:
      new_strt = new_strt.replace(" STH ", " SOUTH ")
    if " ST " in new_strt:
      new_strt = new_strt.replace(" ST ", " STREET ")
    if " RD " in new_strt:
      new_strt = new_strt.replace(" RD ", " ROAD ")
    if "BT " in new_strt:
      new_strt = new_strt.replace("BT ", "BUKIT ")
    if " BT " in new_strt:
      new_strt = new_strt.replace(" BT ", " BUKIT ")
    if " ST" == new_strt[-3:]:
      new_strt = new_strt[:-3] + " STREET"
    if " RD" == new_strt[-3:]:
      new_strt = new_strt[:-3] + " ROAD"
    if new_strt == "JLN MEMBINA BARAT":
      new_strt = "JALAN MEMBINA"
    if "JLN " in new_strt:
      new_strt = new_strt.replace("JLN ", "JALAN ")
    if "LOR " in new_strt:
      new_strt = new_strt.replace("LOR ", "LORONG ")
    if " AVE " in new_strt:
      new_strt = new_strt.replace(" AVE ", " AVENUE ")
    if " AVE" == new_strt[-4:]:
      new_strt = new_strt[:-4] + " AVENUE"
    if " DR " in new_strt:
      new_strt = new_strt.replace(" DR ", " DRIVE ")
    if " DR" == new_strt[-3:]:
      new_strt = new_strt[:-3] + " DRIVE"
    if "C'WEALTH" in new_strt:
      new_strt = new_strt.replace("C'WEALTH", "COMMONWEALTH")
    if "TG " in new_strt:
      new_strt = new_strt.replace("TG ", "TANJONG ")
    if new_strt == "KG BAHRU HILL":
      new_strt = "SPOONER ROAD" #Road no longer exists
    elif "KG " in new_strt:
      new_strt = new_strt.replace("KG ", "KAMPONG ")
    if "UPP " in new_strt:
      new_strt = new_strt.replace("UPP ", "UPPER ")
    if "BUANGKOK SOUTH FARMWAY 1" == new_strt:
      new_strt = "BUANGKOK" #Road no longer exists

    #If street not already in unique_streets, add it
    if new_strt not in unique_streets:
      unique_streets.append(new_strt)


#Converts each item in streets to a list in the format [street_name, latitude, longitude]
i = 0
for row in unique_streets:
  address = row
  lat, lon = get_lat_lon(address)
  unique_streets[i] = [row, lat, lon]
  i += 1

# Initialized the values for CBD's latitude and longitude
CBD = [1.2812, 103.8503]
for row in unique_streets:
  dist = haversine(CBD[0], CBD[1], row[1], row[2])
  row.append(dist)

# Adjusts main dataset to reflect dist to CBD instead of street name
data[0][4] = "distance_to_CBD"
for row in data[1:]:
  if streets.index(row[4]) > len(unique_streets):
    print(row[4])
  else:
    record = unique_streets[streets.index(row[4])]
    row[4] = record[3]

In [32]:
for each in data[2000:2050]:
  print(each)

['2017-02', 'SERANGOON', 'EXECUTIVE', '123', 10.081, '10 TO 12', '150', 'Maisonette', '1985', '67 years 07 months', '700000']
['2017-02', 'SERANGOON', 'EXECUTIVE', '526', 10.765, '04 TO 06', '152', 'Maisonette', '1992', '74 years 09 months', '720000']
['2017-02', 'SERANGOON', 'EXECUTIVE', '525', 10.765, '10 TO 12', '152', 'Maisonette', '1992', '74 years 09 months', '725000']
['2017-02', 'SERANGOON', 'EXECUTIVE', '423', 8.383, '10 TO 12', '154', 'Maisonette', '1989', '70 years 11 months', '938888']
['2017-02', 'TAMPINES', '3 ROOM', '849', 12.108, '01 TO 03', '64', 'Simplified', '1987', '69 years 09 months', '290000']
['2017-02', 'TAMPINES', '3 ROOM', '462', 14.435, '10 TO 12', '64', 'Simplified', '1987', '69 years 05 months', '292000']
['2017-02', 'TAMPINES', '3 ROOM', '879', 12.391, '07 TO 09', '74', 'Model A', '1988', '70 years 06 months', '320000']
['2017-02', 'TAMPINES', '3 ROOM', '219', 13.904, '07 TO 09', '73', 'Model A', '1985', '67 years', '330000']
['2017-02', 'TAMPINES', '3 RO