In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from geopy.distance import great_circle
from tqdm import tqdm

In [2]:
hdb_df = pd.read_csv('data/hdb_coordinates_final.csv', low_memory=False)

In [5]:
hdb_clean = pd.read_csv('data/hdb_combined_clean.csv', low_memory=False)

In [6]:
hdb_df = hdb_df.drop(hdb_df.columns[0], axis=1)
hdb_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 919408 entries, 0 to 919407
Data columns (total 13 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   town                 919408 non-null  object 
 1   flat_type            919408 non-null  object 
 2   block                919408 non-null  object 
 3   street_name          919408 non-null  object 
 4   storey_range         919408 non-null  object 
 5   floor_area_sqm       919408 non-null  float64
 6   flat_model           919408 non-null  object 
 7   lease_commence_date  919408 non-null  int64  
 8   resale_price         919408 non-null  float64
 9   remaining_lease      210358 non-null  object 
 10  address              919408 non-null  object 
 11  latitude             919408 non-null  float64
 12  longitude            919408 non-null  float64
dtypes: float64(4), int64(1), object(8)
memory usage: 91.2+ MB


In [4]:
hdb_df['storey_range'].unique()

array(['10 TO 12', '04 TO 06', '07 TO 09', '01 TO 03', '13 TO 15',
       '19 TO 21', '16 TO 18', '25 TO 27', '22 TO 24', '28 TO 30',
       '31 TO 33', '40 TO 42', '37 TO 39', '34 TO 36', '06 TO 10',
       '01 TO 05', '11 TO 15', '16 TO 20', '21 TO 25', '26 TO 30',
       '36 TO 40', '31 TO 35', '46 TO 48', '43 TO 45', '49 TO 51'],
      dtype=object)

In [7]:
hdb_clean['storey_median'].unique()

array([11,  5,  8,  2, 14, 20, 17, 26, 23, 29, 32, 41, 38, 35,  3, 13, 18,
       28, 33, 47, 44, 50], dtype=int64)

In [5]:
hdb_df['year'] = hdb_df['month'].str[:4].astype(int)

In [6]:
hdb_df[hdb_df['lease_commence_date'] > hdb_df['year']].head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,remaining_lease,address,latitude,longitude,year
23271,1991-11,BISHAN,4 ROOM,227,BISHAN ST 23,04 TO 06,103.0,MODEL A,1992,135000.0,,227 BISHAN ST 23,1.358071,103.847138,1991
34246,1992-08,TAMPINES,5 ROOM,476,TAMPINES ST 44,04 TO 06,132.0,MODEL A,1993,200000.0,,476 TAMPINES ST 44,1.361113,103.953548,1992
46535,1993-06,WOODLANDS,5 ROOM,321,WOODLANDS ST 32,01 TO 03,127.0,IMPROVED,1994,145000.0,,321 WOODLANDS ST 32,1.431886,103.779323,1993
69033,1994-05,CHOA CHU KANG,EXECUTIVE,528,CHOA CHU KANG ST 51,07 TO 09,153.0,APARTMENT,1995,350000.0,,528 CHOA CHU KANG ST 51,1.392412,103.741549,1994
69609,1994-05,PASIR RIS,4 ROOM,631,PASIR RIS DR 3,10 TO 12,104.0,MODEL A,1995,160000.0,,631 PASIR RIS DR 3,1.37889,103.94014,1994


In [7]:
hdb_df = hdb_df[hdb_df['lease_commence_date'] <= hdb_df['year']]

## Amenities

In [8]:
schools_df = pd.read_csv('data/schools_address.csv')
schools_df = schools_df.drop(schools_df.columns[0], axis=1)
schools_df.head()

Unnamed: 0,school_name,address,postal_code,mrt_desc,bus_desc,latitude,longitude
0,ADMIRALTY PRIMARY SCHOOL,11 WOODLANDS CIRCLE,738907,Admiralty Station,"TIBS 965, 964, 913",1.442635,103.80004
1,ADMIRALTY SECONDARY SCHOOL,31 WOODLANDS CRESCENT,737916,ADMIRALTY MRT,904,1.445891,103.802398
2,AHMAD IBRAHIM PRIMARY SCHOOL,10 YISHUN STREET 11,768643,Yishun,Yishun Ring Road - 812 (white plate); Yishun A...,1.433153,103.832942
3,AHMAD IBRAHIM SECONDARY SCHOOL,751 YISHUN AVENUE 7,768928,"CANBERRA MRT, YISHUN MRT","117, 167, 169, 800, 811, 812, 883, 856, 858, 8...",1.43606,103.829719
4,AI TONG SCHOOL,100 Bright Hill Drive,579646,Bishan MRT,"410, 162, 52",1.360583,103.83302


In [9]:
shops_df = pd.read_csv('data/shops_address.csv')
shops_df = shops_df.drop(shops_df.columns[0], axis=1)
shops_df.head()

Unnamed: 0,licence_numbers,business_name,name_of_license,premise_address,trimmed_address,latitude,longitude
0,B02008E000,SHENG SIONG SUPERMARKET,SHENG SIONG SUPERMARKET PTE LTD,"845 YISHUN STREET 81 #01-184, S(760845)",845 YISHUN STREET 81,1.416079,103.835019
1,B02011P000,GIANT,COLD STORAGE SINGAPORE (1983) PTE LTD,"524A JELAPANG ROAD #03-13/18, GREENRIDGE SHOPP...",524A JELAPANG ROAD,1.385296,103.766171
2,B02012N000,COLD STORAGE,COLD STORAGE SINGAPORE (1983) PTE LTD,"768 WOODLANDS AVENUE 6 #01-34, WOODLANDS MART ...",768 WOODLANDS AVENUE 6,1.445718,103.798166
3,B02015J000,SHENG SIONG SUPERMARKET,SHENG SIONG SUPERMARKET PTE LTD,"122 ANG MO KIO AVENUE 3 #01-1753,#01-1757,#01-...",122 ANG MO KIO AVENUE 3,1.370189,103.843413
4,B02017C000,SHENG SIONG SUPERMARKET,SHENG SIONG SUPERMARKET PTE LTD,"301 WOODLANDS STREET 31 #01-217, S(730301)",301 WOODLANDS STREET 31,1.431505,103.773516


In [10]:
import requests
import os
      
url = "https://www.onemap.gov.sg/api/auth/post/getToken"
      
payload = {
        "email": '23jkkrithika@gmail.com',
        "password": 'oneMapapi2322'
      }
      
response = requests.request("POST", url, json=payload)
      
print(response.text)

{
  "access_token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhMzNmZTM2ZGRkMDdmMzhkZDRiMzA0M2IxNGUyMTVjZSIsImlzcyI6Imh0dHA6Ly9pbnRlcm5hbC1hbGItb20tcHJkZXppdC1pdC0xMjIzNjk4OTkyLmFwLXNvdXRoZWFzdC0xLmVsYi5hbWF6b25hd3MuY29tL2FwaS92Mi91c2VyL3Nlc3Npb24iLCJpYXQiOjE3MjU1NjAyNTcsImV4cCI6MTcyNTgxOTQ1NywibmJmIjoxNzI1NTYwMjU3LCJqdGkiOiJVaDFRVWtTWW9uUEY2Y2tsIiwidXNlcl9pZCI6MzE0NywiZm9yZXZlciI6ZmFsc2V9.v1UhAKQFruyhqL0Mn84-Og9Zkmn5V4c8ezLURodD8xg",
  "expiry_timestamp": "1725819457"
}


In [11]:
api_key = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIwNjI3NTRhYzA3MzY4NGJjN2M2NDIzNDMzODZiMmEyMyIsImlzcyI6Imh0dHA6Ly9pbnRlcm5hbC1hbGItb20tcHJkZXppdC1pdC0xMjIzNjk4OTkyLmFwLXNvdXRoZWFzdC0xLmVsYi5hbWF6b25hd3MuY29tL2FwaS92Mi91c2VyL3Bhc3N3b3JkIiwiaWF0IjoxNzE3NDUwOTk4LCJleHAiOjE3MTc3MTAxOTgsIm5iZiI6MTcxNzQ1MDk5OCwianRpIjoiWDcwemRwVnBvdlRjWVRaNiIsInVzZXJfaWQiOjM3MDEsImZvcmV2ZXIiOmZhbHNlfQ._Pg0RLUT76zfwUAQlUWCa6-ZYuuKmKsDGZXmYbEof9g'

In [41]:
import requests

# headers = {
#         'Authorization': api_key  # Use the api_key variable here
#     }
    
url = "https://www.onemap.gov.sg/api/common/elastic/search?searchVal=524A JELAPANG ROAD&returnGeom=Y&getAddrDetails=Y&pageNum=1"
# url = "https://www.onemap.gov.sg/api/public/routingsvc/route?start=1.319728%2C103.8421&end=1.319728905%2C103.8421581&routeType=walk"

response = requests.get(url)
# response = requests.get(url, headers=headers)

print(response.text)


{
  "found": 5,
  "totalNumPages": 1,
  "pageNum": 1,
  "results": [
    {
      "SEARCHVAL": "AMAZING STAR MONTESSORI HOUSE (GREENRIDGE)",
      "BLK_NO": "524A",
      "ROAD_NAME": "JELAPANG ROAD",
      "BUILDING": "AMAZING STAR MONTESSORI HOUSE (GREENRIDGE)",
      "ADDRESS": "524A JELAPANG ROAD AMAZING STAR MONTESSORI HOUSE (GREENRIDGE) SINGAPORE 671524",
      "POSTAL": "671524",
      "X": "20527.2820040528",
      "Y": "40804.5665046293",
      "LATITUDE": "1.38529556252963",
      "LONGITUDE": "103.766170532051"
    },
    {
      "SEARCHVAL": "DBS GREENRIDGE SHOPPING CENTRE",
      "BLK_NO": "524A",
      "ROAD_NAME": "JELAPANG ROAD",
      "BUILDING": "DBS GREENRIDGE SHOPPING CENTRE",
      "ADDRESS": "524A JELAPANG ROAD DBS GREENRIDGE SHOPPING CENTRE SINGAPORE 671524",
      "POSTAL": "671524",
      "X": "20527.2817921769",
      "Y": "40804.5668073092",
      "LATITUDE": "1.38529556526689",
      "LONGITUDE": "103.766170530147"
    },
    {
      "SEARCHVAL": "GREENRIDGE 

In [31]:
import requests

# Block number and road name to search
blk_no = "309"
road_name = "ANG MO KIO AVENUE 1"

# Combine block number and road name to form the search value
search_val = f"{blk_no} {road_name}"

# Encode the search value for the URL
encoded_search_val = requests.utils.quote(search_val)

# Construct the URL
url = f"https://www.onemap.gov.sg/api/common/elastic/search?searchVal={encoded_search_val}&returnGeom=Y&getAddrDetails=Y&pageNum=1"

# Send the GET request
response = requests.get(url)

# Parse the JSON response
response_data = response.json()

# Check if the search returned any results
if 'results' in response_data and response_data['results']:
    # Iterate through the results and print details
    for result in response_data['results']:
        blk_no = result.get('BLK_NO', 'N/A')
        road_name = result.get('ROAD_NAME', 'N/A')
        building_name = result.get('BUILDING', 'N/A')
        address = result.get('ADDRESS', 'N/A')
        postal_code = result.get('POSTAL', 'N/A')
        latitude = result.get('LATITUDE', 'N/A')
        longitude = result.get('LONGITUDE', 'N/A')
        
        print(f"Block No: {blk_no}")
        print(f"Road Name: {road_name}")
        print(f"Building: {building_name}")
        print(f"Address: {address}")
        print(f"Postal Code: {postal_code}")
        print(f"Latitude: {latitude}")
        print(f"Longitude: {longitude}")
        print("-" * 50)
else:
    print("No results found.")


Block No: 215
Road Name: ANG MO KIO AVENUE 1
Building: ANG MO KIO 22
Address: 215 ANG MO KIO AVENUE 1 ANG MO KIO 22 SINGAPORE 560215
Postal Code: 560215
Latitude: 1.36655830166122
Longitude: 103.841624082978
--------------------------------------------------
Block No: 216
Road Name: ANG MO KIO AVENUE 1
Building: ANG MO KIO 22
Address: 216 ANG MO KIO AVENUE 1 ANG MO KIO 22 SINGAPORE 560216
Postal Code: 560216
Latitude: 1.36619678831054
Longitude: 103.841505011903
--------------------------------------------------
Block No: 57
Road Name: ANG MO KIO AVENUE 8
Building: ANG MO KIO BUS INTERCHANGE
Address: 57 ANG MO KIO AVENUE 8 ANG MO KIO BUS INTERCHANGE SINGAPORE 567751
Postal Code: 567751
Latitude: 1.36951942637994
Longitude: 103.848462230319
--------------------------------------------------
Block No: 2874
Road Name: ANG MO KIO AVENUE 9
Building: ANG MO KIO FIRE STATION
Address: 2874 ANG MO KIO AVENUE 9 ANG MO KIO FIRE STATION SINGAPORE 569783
Postal Code: 569783
Latitude: 1.384987852588

In [13]:
hdb_df2 = hdb_df[:100]

In [None]:
# def get_nearest_mrt(lat, lon):
#     url = f"https://www.onemap.gov.sg/api/public/routingsvc/route?start={lat},{lon}&end={lat+0.01},{lon+0.01}&routeType=pt&mode=TRANSIT&maxWalkDistance=1000&numItineraries=1"
    
#     headers = {
#         'Authorization': api_key  # Use the api_key variable here
#     }
    
#     response = requests.get(url, headers=headers)
    
#     if response.status_code == 200:
#         data = response.json()
#         if data['plan']['itineraries']:
#             nearest_mrt = data['plan']['itineraries'][0]['legs'][0]['to']['name']
#             return nearest_mrt
#         else:
#             return None
#     else:
#         print(f"Error: {response.status_code}")
#         return None

# # Apply the function to each row in the DataFrame with progress tracking
# tqdm.pandas(desc="Processing HDB locations")
# hdb_df2['nearest_mrt'] = hdb_df2.progress_apply(lambda row: get_nearest_mrt(row['latitude'], row['longitude']), axis=1)

In [19]:
from geopy.distance import geodesic
from tqdm import tqdm

# Create empty columns in hdb_df to store the results
hdb_df2['nearest_shop_name'] = ''
hdb_df2['nearest_shop_distance'] = 0.0
hdb_df2['nearest_shop_latitude'] = 0.0
hdb_df2['nearest_shop_longitude'] = 0.0

# Iterate over each HDB location
for index, hdb_row in tqdm(hdb_df2.iterrows(), total=hdb_df2.shape[0], desc="Processing HDB shop distance"):
    hdb_coord = (hdb_row['latitude'], hdb_row['longitude'])
    
    # Calculate distance to each shop
    distances = [
        (geodesic(hdb_coord, (shop_row['latitude'], shop_row['longitude'])).meters, 
         shop_row['business_name'], 
         shop_row['latitude'], 
         shop_row['longitude'])
        for _, shop_row in shops_df.iterrows()
    ]
    
    # Find the minimum distance and corresponding shop details
    min_distance, nearest_shop_name, shop_latitude, shop_longitude = min(distances, key=lambda x: x[0])
    
    # Update the hdb_df with the nearest shop details
    hdb_df2.at[index, 'nearest_shop_name'] = nearest_shop_name
    hdb_df2.at[index, 'nearest_shop_distance'] = min_distance
    hdb_df2.at[index, 'nearest_shop_latitude'] = shop_latitude
    hdb_df2.at[index, 'nearest_shop_longitude'] = shop_longitude


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hdb_df2['nearest_shop_name'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hdb_df2['nearest_shop_distance'] = 0.0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hdb_df2['nearest_shop_latitude'] = 0.0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer

In [20]:
hdb_df2.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,remaining_lease,address,latitude,longitude,year,nearest_shop_name,nearest_shop_distance,nearest_shop_latitude,nearest_shop_longitude
0,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977,9000.0,,309 ANG MO KIO AVE 1,1.366558,103.841624,1990,FAIRPRICE,0.0,1.366558,103.841624
1,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,04 TO 06,31.0,IMPROVED,1977,6000.0,,309 ANG MO KIO AVE 1,1.366558,103.841624,1990,FAIRPRICE,0.0,1.366558,103.841624
2,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977,8000.0,,309 ANG MO KIO AVE 1,1.366558,103.841624,1990,FAIRPRICE,0.0,1.366558,103.841624
3,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,07 TO 09,31.0,IMPROVED,1977,6000.0,,309 ANG MO KIO AVE 1,1.366558,103.841624,1990,FAIRPRICE,0.0,1.366558,103.841624
4,1990-01,ANG MO KIO,3 ROOM,216,ANG MO KIO AVE 1,04 TO 06,73.0,NEW GENERATION,1976,47200.0,,216 ANG MO KIO AVE 1,1.366197,103.841505,1990,FAIRPRICE,42.113407,1.366558,103.841624


In [22]:
# Filter shops_df for rows where trimmed_address contains 'ANG MO KIO' and business_name is 'FAIRPRICE'
filtered_shops = shops_df[
    (shops_df['trimmed_address'].str.contains('ANG MO KIO', case=False, na=False)) &
    (shops_df['business_name'] == 'FAIRPRICE')
]

# Display the filtered rows
filtered_shops.head()

Unnamed: 0,licence_numbers,business_name,name_of_license,premise_address,trimmed_address,latitude,longitude
26,B98014C000,FAIRPRICE,NTUC FAIRPRICE CO-OPERATIVE LTD,"215 ANG MO KIO AVENUE 1 #01-877, S(560215)",215 ANG MO KIO AVENUE 1,1.366558,103.841624
87,CE11N90C000,FAIRPRICE,NTUC FAIRPRICE CO-OPERATIVE LTD,"712 ANG MO KIO AVENUE 6 #01-4056, S(560712)",712 ANG MO KIO AVENUE 6,1.371411,103.847084


In [None]:
# list_of_lat = hdb_df2['latitude']
# list_of_long = hdb_df2['longitude']
# shops_lat = shops_df['latitude']
# shops_long = shops_df['longitude']

In [None]:
# list_of_hdb_coordinates = []
# list_of_shops_coordinates = []

# for lat, long in zip(list_of_lat, list_of_long):
#     list_of_hdb_coordinates.append((lat,long))
# for lat, long in zip(shops_lat, shops_long):
#     list_of_shops_coordinates.append((lat, long))

In [None]:
# # Distance to nearest shop
# from geopy.distance import geodesic

# list_of_dist_shops = []
# min_dist_shops = []

# tqdm.pandas(desc="Processing HDB shop distance")
# for origin in list_of_hdb_coordinates:
#     for destination in range(0, len(list_of_shops_coordinates)):
#         list_of_dist_shops.append(geodesic(origin,list_of_shops_coordinates[destination]).meters)
#     shortest = (min(list_of_dist_shops))
#     min_dist_shops.append(shortest)
#     list_of_dist_shops.clear()

In [None]:
# # Distance from CDB
# cbd_dist = []

# for origin in list_of_coordinates:
#     cbd_dist.append(geodesic(origin,(1.2830, 103.8513)).meters) #CBD coordinates

In [None]:
# # Put MRT and CBD distance together
# # df_coordinates['cbd_dist'] = cbd_dist
# hdb_df2['min_dist_shops'] = min_dist_shops

In [None]:
print(min_dist_shops)

In [2]:
hawker_df = pd.read_csv('data/hawkers_address.csv')
hawker_df.head()

Unnamed: 0,name_of_centre,location_of_centre,type_of_centre,owner,no_of_stalls,no_of_cooked_food_stalls,no_of_mkt_produce_stalls,latitude,longitude
0,Adam Road Food Centre,"2, Adam Road, S(289876)",HC,Government,32,32,0,1.324083,103.814182
1,Amoy Street Food Centre,"National Development Building, Annex B, Telok ...",HC,Government,135,134,1,1.27934,103.846652
2,Bedok Food Centre,"1, Bedok Road, S(469572)",HC,Government,32,32,0,1.320347,103.955481
3,Beo Crescent Market,"38A, Beo Crescent, S(169982)",MHC,Government,94,32,62,1.288831,103.827354
4,Berseh Food Centre,"166, Jalan Besar, S(208877)",HC,Government,66,66,0,1.307344,103.856889


In [3]:
mrt_df = pd.read_csv('data/mrt_address.csv')
mrt_df.head()

Unnamed: 0,station_code,latitude,longitude,station_name
0,NS1,1.333153,103.742286,JURONG EAST MRT STATION (EW24 / NS1)
1,NS2,1.349033,103.749566,BUKIT BATOK MRT STATION (NS2)
2,NS3,1.358612,103.751791,BUKIT GOMBAK MRT STATION (NS3)
3,NS4,1.385363,103.744371,CHOA CHU KANG MRT STATION (NS4)
4,NS5,1.397535,103.747405,YEW TEE MRT STATION (NS5)
