In [242]:
from selenium import webdriver #automatically does something on the browser
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
import datetime
from selenium.webdriver.support.ui import Select
import re
import pandas as pd

In [243]:
# Open the HDB - Find a flat page in selenium

driver = webdriver.Chrome(service = ChromeService(ChromeDriverManager().install()))
driver.get("https://homes.hdb.gov.sg/home/finding-a-flat")

In [244]:
# Select the SBF box to see the list of projects.

def get_SBF_units_n_click():
    #1 - returns a list of elements for the 4 flat categories: Resale, BTO, SBF, Upcoming BTO
    test = driver.find_elements(By.XPATH,
                                      "/html/body/app-root/div[2]/app-find-my-flat/section/div/"
                                      "app-search-results/div/div/div[4]/app-flat-cards-categories")
    #2 - Isolate the SBF box
    for element in test:
      if "SBF" in element.text:
          split = element.text.split(sep='\n')
          element.click() #Clicks on SBF
          break
    #3
    return int(''.join([x for x in split[1] if x.isdigit()])) #Return the number of SBF units that must be scraped.
    
sbf_units = get_SBF_units_n_click()
sbf_units

5590

In [245]:
# Chose 50 Results per page (Less pages so the loop is faster)
sel = Select(driver.find_element(By.XPATH, "/html/body/app-root/div[2]/app-find-my-flat/section/div/app-search-results/div/div/div[3]/div/div[1]/div[1]/div[2]/select"))
sel.select_by_value('50')


In [246]:
# Get a list of links to all projects in the SBF, Main loop will open each link and then scrape data in link.
list_of_links = []
while True:
    #1
    for div in driver.find_elements(By.CLASS_NAME,"flat-link"):
        list_of_links.append(div.get_attribute('href'))
    #2
    try:
        driver.find_element(By.CSS_SELECTOR,"[aria-label=Next]").click()
    # if not interactable, means no more pages
    except:
        print("Last page hit")
        break
    time.sleep(1)

Last page hit


In [247]:
projectsCount = len(list_of_links)
projectsCount

631

In [197]:
all_blocks = driver.find_element(By.XPATH,"//*[@id='available-grid']").text
all_blocks

'#07\n24\n68 sqm\n$238,000\n#02\n42*\n68 sqm\n$218,000'

In [201]:
flat_list = re.split('#',all_blocks)
flat_list= remove_null(flat_list)
list_of_flats = []
for floor_level in flat_list:
    floor_level = floor_level.split(sep='\n')
    floor_level = remove_null(floor_level)
    list_of_flats.extend(get_flats(floor_level))
    # list_of_flats.extend(floor_level)
list_of_flats

[{'level': 7, 'unit': '24', 'sqm': 68, 'price': 238000},
 {'level': 2, 'unit': '42*', 'sqm': 68, 'price': 218000}]

In [248]:
def get_units():
    all_blocks = driver.find_element(By.XPATH,"//*[@id='available-grid']").text
    flat_list = re.split('#',all_blocks)
    flat_list= remove_null(flat_list)
    list_of_flats = []
    for floor_level in flat_list:
        floor_level = floor_level.split(sep='\n')
        floor_level = remove_null(floor_level)
        list_of_flats.extend(get_flats(floor_level))
    return list_of_flats

def get_flats(floor_level_list):
    index = 1
    flats = []
    while index < len(floor_level_list):
        test_dict = {'level': int(floor_level_list[0]),
                     'unit' : floor_level_list[index],
                     'sqm':int(floor_level_list[index+1].split(sep=' ')[0]),
                     'price':int(floor_level_list[index+2].replace('$','').replace(',',''))}
        flats.append(test_dict)
        index+=3
    return flats

# helper function to remove null values in list
def remove_null(any_list_with_null:list)-> list:
    return list(filter(None,any_list_with_null))

In [113]:
all_blocks = driver.find_element(By.XPATH,"//*[@id='available-grid']").text
all_blocks

''

In [160]:
flat_type_selector = Select(driver.find_element(By.XPATH,"//*[@id='layout-block']/div[2]/div/div/div[1]/select"))
value = 0
final_flat_block_LD = []

flat_type_selector.select_by_value(str(value))
flat_type_string = driver.find_element(By.XPATH,f"//*[@id='layout-block']/div[2]/div/div/div[1]/select/option[{value+2}]").text
flat_type_string

'3-room'

In [185]:
flat_type_selector = Select(driver.find_element(By.XPATH,"//*[@id='layout-block']/div[2]/div/div/div[1]/select"))
flat_type_selector.select_by_value('1')
flat_type_string = driver.find_element(By.XPATH,f"//*[@id='layout-block']/div[2]/div/div/div[1]/select/option[3]").text
flat_type_string

'4-room'

In [249]:
def scroll_flat_type(town_dict):
    """
    Function Selects a flat type from [Choose a flat type] drop down menu.
    Adds flat type as string to town_dict dictionary with 'flat_type' Key.
    Then triggers scroll_blocks functions (returns list of blocks)
    
    """
    flat_type_selector = Select(driver.find_element(By.XPATH,"//*[@id='layout-block']/div[2]/div/div/div[1]/select"))
    value = 0
    final_flat_block_LD = []
    while True:
        try:
            flat_type_selector.select_by_value(str(value))
            flat_type_string = driver.find_element(By.XPATH,f"//*[@id='layout-block']/div[2]/div/div/div[1]/select/option[{value+2}]").text
            flat_type_dict = town_dict|{"flat_type": flat_type_string} #passed into scroll_blocks function
            final_flat_block_LD.extend(scroll_blocks(flat_type_dict))
            value+= 1
        except:
            break
    return final_flat_block_LD

def scroll_blocks(flat_type_dict):
    """
    Function takes in flat_type_dict Dictionary (generated in scroll_flat_Type function)
    selects block number from [Choose block no.] drop down menu
    Adds block number as string to block_dict dictionary under 'Block' key
    Trigger get_ethnic() function for the block. (returns dictionary of race:quota)
    Returns list of dictionary, dictionary merges  flat_Type, block, ethnic, units
    
    """
    block_no_selector = Select(driver.find_element(By.XPATH,"//*[@id='layout-block']/div[2]/div/div/div[3]/select"))
    value = 0
    flat_block_LD = []
    while True:
        try:
            block_no_selector.select_by_value(str(value))
            block_no_string = driver.find_element(By.XPATH,f"//*[@id='layout-block']/div[2]/div/div/div[3]/select/option[{value+2}]").text
            block_dict = {'Block': block_no_string}
            ethnics_dict = get_ethnic()
            list_of_flats = [flat_type_dict|block_dict|x|ethnics_dict for x in get_units()]
            flat_block_LD.extend(list_of_flats)
            value+= 1
        except:
            break
    return flat_block_LD

def get_ethnic():
    """
    Finds the Available ethnic quota segment for the block
    return the quota in a dictionary of {race: quoto}
    """
    ethnic = driver.find_element(By.XPATH,"//*[@id='available-sidebar']/div[1]/div[2]").text
    ethnic = re.split(r'\n|:',ethnic)
    ethnic = dict(zip(ethnic[::2], ethnic[1::2]))
    return ethnic

In [190]:
dict1= {'apple': 1,
        'pear' : 2}
dict2 = {'Pink' : 'red',
         'Violet' : 'purple'}
myList = [dict1 | dict2]
myList

[{'apple': 1, 'pear': 2, 'Pink': 'red', 'Violet': 'purple'}]

In [188]:
ethnic = driver.find_element(By.XPATH,"//*[@id='available-sidebar']/div[1]/div[2]").text
ethnic = re.split(r'\n|:',ethnic)
ethnic = dict(zip(ethnic[::2], ethnic[1::2]))
ethnic

{'Malay': ' 1', 'Chinese': ' 1', 'Indian/ Other races': ' 1'}

In [226]:
town_details = driver.find_element(By.XPATH,"/html/body/app-root/div[2]/app-sbf-details/section/div/div[3]/div[1]/div/div/div/div[2]/div").text.split(sep='\n')
town_dict = dict(zip(town_details[::2],town_details[1::2]))
town_dict

{'Town': 'Bukit Batok',
 'Remaining lease': '15 - 35 years',
 'Probable completion date': 'Keys available'}

In [236]:
project_name = driver.find_element(By.XPATH,"/html/body/app-root/div[2]/app-sbf-details/section/div/div[2]/div/div[2]/div[1]/h3")
project_name.text.split(sep='\n')[0].replace(' (Sale of Balance Flats)','')


'Golden Lily'

In [250]:
def get_town_details():
    project_name = driver.find_element(By.XPATH,"/html/body/app-root/div[2]/app-sbf-details/section/div/div[2]/div/div[2]/div[1]/h3")
    project_name = project_name.text.split(sep='\n')[0].replace(' (Sale of Balance Flats)','')
    town_details = driver.find_element(By.XPATH,"/html/body/app-root/div[2]/app-sbf-details/section/div/div[3]/div[1]/div/div/div/div[2]/div").text.split(sep='\n')
    town_dict = dict(zip(town_details[::2],town_details[1::2]))
    town_dict['Project_name'] = project_name
    town_dict['Remaining lease'] = parse_lease(town_dict['Remaining lease'])
    town_dict['Est months'] = ''
    if 'available' not in town_dict['Probable completion date'].lower():
        town_dict['Probable completion date']= parse_dates(town_dict['Probable completion date'])
        town_dict['Keys Available'] = False
    else:
        town_dict['Probable completion date'] = ''
        town_dict['Keys Available'] = True

    return town_dict

In [209]:
# def get_town_details():
#     town_details = driver.find_element(By.XPATH,"/html/body/app-root/div[2]/app-sbf-details/section/div/div[3]/div[1]/div/div/div/div[2]/div").text.split(sep='\n')
#     town_dict = dict(zip(town_details[::2],town_details[1::2]))
#     town_dict['Remaining lease'] = parse_lease(town_dict['Remaining lease'])
#     town_dict['Est months'] = ''
#     if 'available' not in town_dict['Probable completion date'].lower():
#         town_dict['Probable completion date']= parse_dates(town_dict['Probable completion date'])
#         town_dict['Keys Available'] = False
#     else:
#         town_dict['Probable completion date'] = ''
#         town_dict['Keys Available'] = True

#     return town_dict

In [251]:
def parse_lease(lease):
    return int(re.findall(r'\d+',lease)[-1])

def parse_dates(date):
    if "Q" in date:
        _date = re.split(r'Q/| to ',date)[-2:]
        date= datetime.datetime(int(_date[1]),int(_date[0])*3,1)

    else:
        _date = re.split(r' to |/',date)[-2:]
        date= datetime.datetime(int(_date[1]),int(_date[0]),1)
    return date

In [252]:
full_list = []
count = 0
for link in list_of_links:
    driver.get(link)
  # this is prevent too much requests to the server
    time.sleep(1) 
    flat_details = scroll_flat_type(get_town_details())
    final_list_dict = [x|{'Link': link} for x in flat_details]
    full_list.extend(final_list_dict)
    count += 1
    print(count)

# Always remember to quit after you're done
driver.quit()

# Verify the units
print(len(full_list) == sbf_units)
# >>> True

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


In [103]:
len(full_list)

3399

In [108]:
list_of_links

['https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_Q0NLX040QzE1XzE3Mzg3NDc4MDAwMjI',
 'https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_UFJDX04yQzlfMTczODY1MDYwMDA1MA',
 'https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_QkJfTjRDMjZfMTczODYzNjIwMDA5MA',
 'https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_SldfTjdDMTVfMTczODY1MDYwMDAyMw',
 'https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_SEdfTjVDMjNfMTczODAzODYwMDA2OQ',
 'https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_U0tfTjRDMjdfMTczODY1NDIwMDAzOQ',
 'https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_VEFQX040QzI3XzE3Mzg3NDc4MDAwNDE',
 'https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_WVNfTjRDNF8xNzM4NjQ3MDAwMDgz',
 'https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_QkJfTjNDMTNfMTczODYzNjIwMDExMw',
 'https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_SldfTjFDMzBfMTczODY0NzAwMDAyOA',
 'https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_SEdfTjFDMV8xNzM4MDM4NjAwMDM0',
 'https://homes.hdb.gov

In [254]:
full_list

[{'Town': 'Choa Chu Kang',
  'Remaining lease': 45,
  'Probable completion date': '',
  'Project_name': 'Sunshine Gardens',
  'Est months': '',
  'Keys Available': True,
  'flat_type': '2-room Flexi',
  'Block': 'Blk 488B',
  'level': 4,
  'unit': '151*',
  'sqm': 37,
  'price': 71600,
  'Malay': ' 1',
  'Chinese': ' 1',
  'Indian/ Other races': ' 1',
  'Link': 'https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_Q0NLX040QzE1XzE3Mzg3NDc4MDAwMjI'},
 {'Town': 'Pasir Ris',
  'Remaining lease': 45,
  'Probable completion date': '',
  'Project_name': 'Golden Lily',
  'Est months': '',
  'Keys Available': True,
  'flat_type': '2-room Flexi',
  'Block': 'Blk 212A',
  'level': 12,
  'unit': '630*',
  'sqm': 37,
  'price': 59900,
  'Malay': ' 4',
  'Chinese': ' 5',
  'Indian/ Other races': ' 3',
  'Link': 'https://homes.hdb.gov.sg/home/sbf/details/2025-02_SBF_UFJDX04yQzlfMTczODY1MDYwMDA1MA'},
 {'Town': 'Pasir Ris',
  'Remaining lease': 45,
  'Probable completion date': '',
  'Project_name': '

In [257]:
df = pd.DataFrame(full_list)
col = df.pop("Project_name")
df.insert(1, col.name, col)

In [258]:
df.sample(n=5)

Unnamed: 0,Town,Project_name,Remaining lease,Probable completion date,Est months,Keys Available,flat_type,Block,level,unit,sqm,price,Malay,Chinese,Indian/ Other races,Link
1928,Tengah,Plantation Verge,99,2027-09-01 00:00:00,,False,4-room,Blk 100B,14,127,93,424200,19,71,12,https://homes.hdb.gov.sg/home/sbf/details/2025...
1715,Queenstown,Skyoasis @ Dawson,96,,,True,2-room Flexi,Blk 41A,17,280*,47,387000,2,2,2,https://homes.hdb.gov.sg/home/sbf/details/2025...
526,Toa Payoh,Kim Keat Ville,45,,,True,3-room,Blk 26,7,160*,67,203000,1,1,1,https://homes.hdb.gov.sg/home/sbf/details/2025...
3991,Ang Mo Kio,Pine Ville @ AMK,99,2027-09-01 00:00:00,,False,3-room,Blk 809B,10,165,68,429900,11,25,8,https://homes.hdb.gov.sg/home/sbf/details/2025...
2119,Tengah,Plantation Verge,99,2027-09-01 00:00:00,,False,5-room,Blk 101A,4,157,113,528700,8,9,4,https://homes.hdb.gov.sg/home/sbf/details/2025...


In [259]:
df.to_csv('test.csv')