In [2]:
import pandas as pd
import numpy as np
import requests
import json

In [4]:
def find_postal(lst, filename):
    '''With the block number and street name, get the full address of the hdb flat,
    including the postal code, geogaphical coordinates (lat/long)'''
    
    for index,add in enumerate(lst):
        # Do not need to change the URL
        url= "https://www.onemap.gov.sg/api/common/elastic/search?returnGeom=Y&getAddrDetails=Y&pageNum=1&searchVal="+ add        
        print(index,url)
        
        # Retrieve information from website
        response = requests.get(url)
        try:
            data = json.loads(response.text) 
        except ValueError:
            print('JSONDecodeError')
            pass
    
        temp_df = pd.DataFrame.from_dict(data["results"])
        # The "add" is the address that was used to search in the website
        temp_df["address"] = add
        
        # Create the file with the first row that is read in 
        if index == 0:
            file = temp_df
        else:
            file = file.append(temp_df)
    file.to_csv(filename + '.csv')

In [8]:
!pip install geopy


Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl.metadata (6.8 kB)
Collecting geographiclib<3,>=1.52 (from geopy)
  Downloading geographiclib-2.0-py3-none-any.whl.metadata (1.4 kB)
Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
   ---------------------------------------- 0.0/125.4 kB ? eta -:--:--
   --- ------------------------------------ 10.2/125.4 kB ? eta -:--:--
   ----------------------------------- ---- 112.6/125.4 kB 1.6 MB/s eta 0:00:01
   ---------------------------------------- 125.4/125.4 kB 1.8 MB/s eta 0:00:00
Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
   ---------------------------------------- 0.0/40.3 kB ? eta -:--:--
   ---------------------------------------- 40.3/40.3 kB ? eta 0:00:00
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-2.4.1


In [10]:
import geopy


In [12]:
from geopy.distance import geodesic

def find_nearest(house, amenity, radius=2):
    """
    this function finds the nearest locations from the 2nd table from the 1st address
    Both are dataframes with a specific format:
        1st column: any string column ie addresses taken from the "find_postal_address.py"
        2nd column: latitude (float)
        3rd column: longitude (float)
    Column name doesn't matter.
    It also finds the number of amenities within the given radius (default=2)
    """
    results = {}
    # first column must be address
    for index,flat in enumerate(house.iloc[:,0]):
        
        # 2nd column must be latitude, 3rd column must be longitude
        flat_loc = (house.iloc[index,1],house.iloc[index,2])
        flat_amenity = ['','',100,0]
        for ind, eachloc in enumerate(amenity.iloc[:,0]):
            amenity_loc = (amenity.iloc[ind,1],amenity.iloc[ind,2])
            distance = geodesic(flat_loc,amenity_loc)
            distance = float(str(distance)[:-3]) # convert to float

            if distance <= radius:   # compute number of amenities in 2km radius
                flat_amenity[3] += 1

            if distance < flat_amenity[2]: # find nearest amenity
                flat_amenity[0] = flat
                flat_amenity[1] = eachloc
                flat_amenity[2] = distance

        results[flat] = flat_amenity
    return results

In [14]:
def dist_from_location(house, location):
    """
    this function finds the distance of a location from the 1st address
    First is a dataframe with a specific format:
        1st column: any string column ie addresses taken from the "find_postal_address.py"
        2nd column: latitude (float)
        3rd column: longitude (float)
    Column name doesn't matter.
    Second is tuple with latitude and longitude of location
    """
    results = {}
    # first column must be address
    for index,flat in enumerate(house.iloc[:,0]):
        
        # 2nd column must be latitude, 3rd column must be longitude
        flat_loc = (house.iloc[index,1],house.iloc[index,2])
        flat_amenity = ['',100]
        distance = geodesic(flat_loc,location)
        distance = float(str(distance)[:-3]) # convert to float
        flat_amenity[0] = flat
        flat_amenity[1] = distance
        results[flat] = flat_amenity
    return results

In [16]:
price1999 = pd.read_csv('Project 3 - Real Estate - Business Analytics/OutsideDataSource/Resale_data_dataGovSg/ResaleFlatPricesBasedonApprovalDate19901999.csv')
price2012 = pd.read_csv('Project 3 - Real Estate - Business Analytics/OutsideDataSource/Resale_data_dataGovSg/ResaleFlatPricesBasedonApprovalDate2000Feb2012.csv')
price2014 = pd.read_csv('Project 3 - Real Estate - Business Analytics/OutsideDataSource/Resale_data_dataGovSg/ResaleFlatPricesBasedonRegistrationDateFromMar2012toDec2014.csv')
price2016 = pd.read_csv('Project 3 - Real Estate - Business Analytics/OutsideDataSource/Resale_data_dataGovSg/ResaleFlatPricesBasedonRegistrationDateFromJan2015toDec2016.csv')
price2017 = pd.read_csv('Project 3 - Real Estate - Business Analytics/OutsideDataSource/Resale_data_dataGovSg/ResaleflatpricesbasedonregistrationdatefromJan2017onwards.csv')

prices = pd.concat([price1999, price2012, price2014], sort=False)
prices = pd.concat([prices, price2016, price2017], axis=0, ignore_index=True, sort=False)

In [18]:
prices = prices[['block', 'street_name']]
prices['address'] = prices['block'] + ' ' + prices['street_name']
all_address = list(prices['address'])
unique_address = list(set(all_address))

print('Unique addresses:', len(unique_address))

Unique addresses: 9841


In [20]:
unique_address[:10]

['216C COMPASSVALE DR',
 '93 GEYLANG BAHRU',
 '688C WOODLANDS DR 75',
 '815B CHOA CHU KANG AVE 7',
 '427 CHOA CHU KANG AVE 4',
 '187 PUNGGOL CTRL',
 '677 WOODLANDS AVE 6',
 '30 JLN BAHAGIA',
 '268D PUNGGOL FIELD',
 '117 BEDOK NTH RD']

In [22]:
find_postal(unique_address, 'Project 3 - Real Estate - Business Analytics/OutsideDataSource/Resale_data_dataGovSg/flat_coordinates')

0 https://developers.onemap.sg/commonapi/search?returnGeom=Y&getAddrDetails=Y&pageNum=1&searchVal=216C COMPASSVALE DR


ConnectionError: HTTPSConnectionPool(host='developers.onemap.sg', port=443): Max retries exceeded with url: /commonapi/search?returnGeom=Y&getAddrDetails=Y&pageNum=1&searchVal=216C%20COMPASSVALE%20DR (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x00000296B63D5B50>: Failed to resolve 'developers.onemap.sg' ([Errno 11001] getaddrinfo failed)"))

In [None]:
find_postal(unique_address, 'Data/flat_coordinates')


In [None]:
flat_coord = pd.read_csv('Data/flat_coordinates.csv')
flat_coord = flat_coord[['address','LATITUDE','LONGITUDE']]
flat_coord.head()

In [None]:
supermarket = pd.read_csv('Data/list-of-supermarket-licences.csv')
supermarket.head()

In [None]:
supermerket_address = list(supermarket['postal_code'])
unique_supermarket_address = list(set(supermerket_address))

print('Unique addresses:', len(unique_supermarket_address))