# Scraping Property Prices using Beautiful Soup


This project aims to scrape propety listing details from 99.co - a Singapore online property website. It has the latest listings for all types of properties, with listing updated almost every hour. 
Therefore, we will scrape the sale listing details from the website using Beautuful Soup and store it as pandas dataframe for future projects. 

## 1. Import Relevant Libraries

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## 2. Extract the elements Individually

In [None]:
# Request access to URL through requests.post
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36'}
post = requests.post('https://www.99.co/singapore/s/sale?isFilterUnapplied=false&listing_type=sale&map_bounds=1.5827095153768858%2C103.49449749970108%2C1.1090706240313446%2C104.12483807587296&page_num=1&page_size=35&property_segments=residential&query_coords=1.3039947%2C103.8298507&query_limit=radius&query_type=city&radius_max=1000&rental_type=unit&show_cluster_preview=true&show_description=true&show_future_mrts=true&show_internal_linking=true&show_meta_description=true&show_nearby=true&zoom=11', headers = header)
html = post.content

#Use Beautiful Soup parser 'lxml' to parse the HTML document 
soup = BeautifulSoup(html, "lxml")

In [116]:
# To extract the title of the property 
names = soup.findAll('a', class_= '_3Ajbv _30I97 _1vzK2')
names[0].get_text(strip= True)

'4 Room HDB in 472C Fernvale Street'

In [15]:
# To extract the price for the property 
price = soup.findAll('p', class_ = '_2sIc2 JlU_W _2rhE-')
price[0].get_text(strip= True)

'$620,000'

In [21]:
# To extract the price per square foot 
psf = soup.findAll('p', class_ = '_2y86Q _1iMmV _2rhE-')
psf[0].get_text(strip = True )


'$484 psf'

In [106]:
# To extract Number of bedrooms
bed = soup.findAll('p', class_ = 'dniCg _2rhE- _1c-pJ')
total = []

for i in range(len(bed)):
    p = bed[i].get_text(strip = True)
    total.append(p)
    
total

['3 Beds',
 '2 Baths',
 '1,001 sqft / 93 sqm',
 '4 Beds',
 '4 Baths',
 '2,034 sqft / 188.96 sqm',
 '4 Beds',
 '4 Baths',
 '1,658 sqft / 154.03 sqm',
 '3 Beds',
 '2 Baths',
 '1,216 sqft / 112.97 sqm',
 '3 Beds',
 '2 Baths',
 '1,206 sqft / 112.04 sqm',
 '3 Beds',
 '4 Baths',
 '1,152 sqft / 107.02 sqm',
 '1 Bed',
 '1 Bath',
 '570 sqft / 52.95 sqm',
 '3 Beds',
 '2 Baths',
 '1,130 sqft / 104.98 sqm',
 '3 Beds',
 '3 Baths',
 '1,259 sqft / 116.96 sqm',
 '3 Beds',
 '2 Baths',
 '904 sqft / 83.98 sqm',
 '3 Beds',
 '2 Baths',
 '926 sqft / 86.03 sqm',
 '2 Beds',
 '2 Baths',
 '1,055 sqft / 98.01 sqm',
 '3 Beds',
 '2 Baths',
 '1,399 sqft / 129.97 sqm',
 '2 Beds',
 '2 Baths',
 '721 sqft / 66.98 sqm',
 '2 Beds',
 '2 Baths',
 '721 sqft / 66.98 sqm',
 '5 Beds',
 '5 Baths',
 '3,175 sqft / 294.97 sqm',
 'built-up',
 '1 Bed',
 '1 Bath',
 '538 sqft / 49.98 sqm',
 '3 Beds',
 '2 Baths',
 '1,108 sqft / 102.94 sqm',
 '2 Beds',
 '2 Baths',
 '1,012 sqft / 94.02 sqm',
 '4 Beds',
 '3 Baths',
 '3,225 sqft / 299.61 s

This HTML tag includes the info for number of bedrooms, bathrooms and size of property. Hence, there is a need to filter them out from the same HTML tag. 

In [132]:
Bed = [s for s in total if s.endswith('Beds' or 'Bed')]
len(Bed)

30

In [113]:
Baths = [s for s in total if s.endswith('Baths' or 'Bath')]
len(Baths)

29

In [104]:
# To extract property size 
Size = []
for i in range(len(bed)):
    n = bed[i].get_text(strip = True)
    Size.append(n)
    Size = [s for s in Size if s.endswith('sqm')]
Size
len(Size)

35

## 3. Build web scraper to loop through multiple pages 

In [140]:
# Write a For loop to loop through multiple pages 

page = 1
h = 100
titles = []
Psf = []
Price = []
Size = []
Bed = []
Baths = []

for page in range(1, h, 1):
    url = f"https://www.99.co/singapore/s/sale?isFilterUnapplied=false&listing_type=sale&map_bounds=1.5827095153768858%2C103.49449749970108%2C1.1090706240313446%2C104.12483807587296&page_num={page}&page_size=35&property_segments=residential&query_coords=1.3039947%2C103.8298507&query_limit=radius&query_type=city&radius_max=1000&rental_type=unit&show_cluster_preview=true&show_description=true&show_future_mrts=true&show_internal_linking=true&show_meta_description=true&show_nearby=true&zoom=11"
    header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36'}
    post = requests.post('https://www.99.co/singapore/s/sale?isFilterUnapplied=false&listing_type=sale&map_bounds=1.5827095153768858%2C103.49449749970108%2C1.1090706240313446%2C104.12483807587296&page_num=1&page_size=35&property_segments=residential&query_coords=1.3039947%2C103.8298507&query_limit=radius&query_type=city&radius_max=1000&rental_type=unit&show_cluster_preview=true&show_description=true&show_future_mrts=true&show_internal_linking=true&show_meta_description=true&show_nearby=true&zoom=11', headers = header)
    html = post.content
    soup = BeautifulSoup(html, "lxml")
    
    #To extract title of property
    names = soup.findAll('a', class_= '_3Ajbv _30I97 _1vzK2')
    for i in range(len(names)):
        n = names[i].get_text(strip= True)
        titles.append(n)
        
    #To extract the price of property 
    price = soup.findAll('p', class_ = '_2sIc2 JlU_W _2rhE-')
    for i in range(len(price)):
        P = price[i].get_text(strip= True)
        Price.append(P)
    
    #To extract the psf of a property
    psf = soup.findAll('p', class_ = '_2y86Q _1iMmV _2rhE-')
    for i in range(len(psf)):
        s = psf[i].get_text(strip = True)
        Psf.append(s)
    
    # To extract the size of property 
    total = soup.findAll('p', class_ = 'dniCg _2rhE- _1c-pJ')
    for i in range(len(total)):
        p = total[i].get_text(strip = True)
        Size.append(p)
        Size = [s for s in Size if s.endswith('sqm')]
         # To extract the number of bedrooms
        Bed.append(p)
        Bed = [s for s in Bed if s.endswith('Beds' or 'Bed')]
         # To extract the number of bathrooms
        Baths.append(p)
        Baths = [s for s in Baths if s.endswith('Baths' or 'Bath')]

    page = page + 1
    
    

In [146]:
len(Size)

3465

In [148]:
# Create dataframe to store data
df = pd.DataFrame(list(zip(titles, Price, Psf, Size, Bed, Baths)),
               columns =['Name', 'Price', 'psf', 'Size', 'No. of Bedrooms', 'No. of Bathrooms'])


In [149]:
df

Unnamed: 0,Name,Price,psf,Size,No. of Bedrooms,No. of Bathrooms
0,4 Bed Condo in The Coast At Sentosa Cove,"$3,800,000","$1,612 psf","2,357 sqft / 218.97 sqm",4 Beds,4 Baths
1,3 Room HDB in 24 Marsiling Drive,"$399,000",$399 psf,"1,000 sqft / 92.9 sqm",3 Beds,2 Baths
2,3 Bed Condo in Yong Siak Court,"$2,680,000","$1,805 psf","1,485 sqft / 137.96 sqm",3 Beds,2 Baths
3,3 Room HDB in 978D Buangkok Crescent,"$488,888",$505 psf,968 sqft / 89.93 sqm,3 Beds,2 Baths
4,5 Bed House on Kasara,"$21,000,000","$2,323 psf","9,042 sqft / 840.03 sqm",5 Beds,5 Baths
...,...,...,...,...,...,...
3251,Exec HDB in 164 Bukit Batok Street 11,"$1,050,000",$668 psf,"1,571 sqft / 145.95 sqm",3 Beds,2 Baths
3252,2 Bed Condo in 6 Derbyshire,"$1,750,000","$2,391 psf",732 sqft / 68 sqm,4 Beds,4 Baths
3253,4 Room HDB in 672B Yishun Avenue 4,"$588,888",$595 psf,990 sqft / 91.97 sqm,2 Beds,2 Baths
3254,3 Room HDB in 113D Mcnair Road,"$930,000",$930 psf,"1,000 sqft / 92.9 sqm",3 Beds,2 Baths


In [150]:
# Save dataframe as CSV for future use
df.to_csv('property listings')