## Scraping Housing Rental Data Using Selenium and Beautiful soup

### Install packages

In [112]:
# !pip install datatable
# !pip install selenium
# !apt install chromium-chromedriver

### Import necessary packages

In [None]:
import pandas as pd
import datatable as dt
import time
import datetime
from datetime import datetime
from bs4 import BeautifulSoup 
import requests 

from selenium import webdriver
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
from selenium.webdriver.chrome.options import Options
from google.colab import files

driver = webdriver.Chrome('chromedriver',options=chrome_options)

options = Options()
options.add_argument('--incognito')
options.add_argument("download.default_directory=c://content//")



upload prepared file for managing the scraping process

In [None]:
df = dt.fread('/content/nga.prop.centre.xlsx')
dfx = df.to_pandas()
dfx.head()

Unnamed: 0,state,lga,path
0,abuja,abaji,abuja/abaji
1,abuja,apo,abuja/apo
2,abuja,asokoro-district,abuja/asokoro-district
3,abuja,bwari,abuja/bwari
4,abuja,central-area-phase-2,abuja/central-area-phase-2


### Get the source data url

In [114]:
url = 'https://nigeriapropertycentre.com/for-rent/'

### append necessary path to url

In [115]:
page_no = '?page=1'
spath = dfx.iloc[13,2]
con_path = url + spath + page_no
con_path

'https://nigeriapropertycentre.com/for-rent/abuja/durumi?page=1'

https://nigeriapropertycentre.com/for-rent/abuja/durumi?page=1

### passing and loading the html page

In [116]:
# apply the get method
driver.get(con_path) 
  
# this is just to ensure that the page is loaded
time.sleep(5) 
  
html = driver.page_source
  
# this renders the JS code and stores all
# of the information in static HTML code.
  
# Now, we could simply apply bs4 to html variable
soup = BeautifulSoup(html, "html.parser")

### pretify the loaded html page

In [117]:
print(soup.prettify())

<html class=" js no-touch cssanimations csstransitions" lang="en" style="">
 <head>
  <meta charset="utf-8"/>
  <meta content="app-id=1125813120" name="apple-itunes-app"/>
  <meta content="app-id=com.nigeriapropertycentre.app" name="google-play-app"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <meta content="UwdBgZWoDBzNkidRBPspDeAWcqnhYNXWds4SXYDp" name="csrf-token"/>
  <meta content="max-image-preview:large" name="robots"/>
  <link href="https://st.nigeriapropertycentre.com/webapp/manifest.json" rel="manifest"/>
  <link href="https://assets.nigeriapropertycentre.com/e3a33ae7-e883-47bc-9a2f-fe8c8dc99b14/launcher-icon-60.png" rel="apple-touch-icon"/>
  <link href="https://assets.nigeriapropertycentre.com/e3a33ae7-e883-47bc-9a2f-fe8c8dc99b14/launcher-icon-76.png" rel="apple-touch-icon" sizes="76x76"/>
  <link href="https://assets.nigeriapropertycentre.com/e3a33ae7-e883-47bc-9a2f-fe8c8dc99b14/launcher-icon-120.png" rel="apple-touch-icon" sizes="120x120"/>

### loop through loaded html tags to get to housing type 

In [118]:
#
table1 = []
for tag in soup.find_all("h4"):
  table1.append(tag.text)
  # Printing the name, and text of h4 tag
  print(f'{tag.name}: {tag.text}')

h4: 2 bedroom flat / apartment for rent
h4: 3 bedroom flat / apartment for rent
h4: 3 bedroom flat / apartment for rent
h4: 3 bedroom flat / apartment for rent
h4: 2 bedroom flat / apartment for rent
h4: 2 bedroom flat / apartment for rent
h4: 2 bedroom flat / apartment for rent
h4: 2 bedroom flat / apartment for rent
h4: 2 bedroom flat / apartment for rent
h4: 2 bedroom flat / apartment for rent
h4: 2 bedroom flat / apartment for rent
h4: 2 bedroom flat / apartment for rent
h4: 3 bedroom flat / apartment for rent
h4: 2 bedroom flat / apartment for rent
h4: 2 bedroom flat / apartment for rent
h4: 2 bedroom flat / apartment for rent
h4: 2 bedroom flat / apartment for rent
h4: 2 bedroom house for rent
h4: 8 bedroom detached duplex for rent
h4: 2 bedroom flat / apartment for rent
h4: 

FAQs  


h4: Sign in or register to save
h4: 
h4: About Us
h4: Advertise with Us
h4: Market Trends
h4: Popular Property
h4: Companies
h4: Useful Links
h4: Follow us
h4: Download our Mobile App
h4: Other Reg

### loop through loaded html tags to address or location 

In [119]:
#
table2 = []
for tag in soup.find_all("address", class_="voffset-bottom-10"):
  table2.append(tag.text)
  # Printing the name, and text of address tag
  print(f'{tag.name}: {tag.text}')

address:   Durumi, Abuja
address:   Durumi, Abuja
address:   American International School, Durumi, Abuja
address:   Close To American International School, Durumi, Abuja
address:   By American International, Durumi, Abuja
address:   Durumi, Abuja
address:   American International School, Durumi, Abuja
address:   By American International School, Durumi, Abuja
address:   American School, Durumi, Abuja
address:   Near American Intl. Schools, Durumi, Abuja
address:   Durumi, Abuja
address:   Durumi, Abuja
address:   By American International School, Durumi, Abuja
address:   Durumi, Abuja
address:   By American International School, Durumi, Abuja
address:   Durumi, Abuja
address:   Durumi, Abuja
address:   American International School Diplomatic Zone, Durumi, Abuja
address:   Prince And Princess Estate Abuja Diplomatic Zone, Durumi, Abuja
address:   Durumi, Abuja


### loop through loaded html tags to get house features

In [120]:
#table3 = soup.find_all("p")
table3 = []
for tag in soup.find_all('p'):
  table3.append(tag.text)
  # Printing the name, and text of p tag
  print(f'{tag.name}: {tag.text}')

p: 
It's a brand new two bedroom apartment in durumi
all rooms are en suit
style of the house is modern
rent is 2.5m
sc:300k
a/l is ...

p: 
This is a serviced three bedroom in durumi
all rooms are en suit and spacious
comes with ac and central generator
rent is 3m
service charge is 1m
agency and legal is 15%=450k
total is 4.4...

p: 
 Very clean serviced 3bedroom apartment in a mini estate in american international school, durumi.
key features:
* standby generator
* serene and we'll secured environment
* standard wardrobes
* fitted kitchen
* ample parking space
* air conditioners
* modern toilets fittings
* balconies
* street lights
* good drainage system
* dining area
* visitors toilet
* all rooms ensuite
* 24 hours electricity supply
* treated borehole
* well trained uniform security men
* tarred roads and good road network
* just by american international school etc
note: agency and legal fee is 15% of rental value
for more information, please contact...

p: 
A newly built and exqu

### slice off irelevant parts from each list to ensure equal length

In [121]:
table1 = table1[0:20]

In [122]:
table2

[' \xa0Durumi, Abuja',
 ' \xa0Durumi, Abuja',
 ' \xa0American International School, Durumi, Abuja',
 ' \xa0Close To American International School, Durumi, Abuja',
 ' \xa0By American International, Durumi, Abuja',
 ' \xa0Durumi, Abuja',
 ' \xa0American International School, Durumi, Abuja',
 ' \xa0By American International School, Durumi, Abuja',
 ' \xa0American School, Durumi, Abuja',
 ' \xa0Near American Intl. Schools, Durumi, Abuja',
 ' \xa0Durumi, Abuja',
 ' \xa0Durumi, Abuja',
 ' \xa0By American International School, Durumi, Abuja',
 ' \xa0Durumi, Abuja',
 ' \xa0By American International School, Durumi, Abuja',
 ' \xa0Durumi, Abuja',
 ' \xa0Durumi, Abuja',
 ' \xa0American International School Diplomatic Zone, Durumi, Abuja',
 ' \xa0Prince And Princess Estate Abuja Diplomatic Zone, Durumi, Abuja',
 ' \xa0Durumi, Abuja']

In [123]:
table3 = table3[0:20]

In [124]:
len(table1), len(table2), len(table3)

(20, 20, 20)

### merge all list into a single dataframe

In [125]:
rent_table = pd.DataFrame({'prop.type':table1,
                           'address':table2,
                           'features':table3})

rent_table

Unnamed: 0,prop.type,address,features
0,2 bedroom flat / apartment for rent,"Durumi, Abuja",\nIt's a brand new two bedroom apartment in du...
1,3 bedroom flat / apartment for rent,"Durumi, Abuja",\nThis is a serviced three bedroom in durumi\n...
2,3 bedroom flat / apartment for rent,"American International School, Durumi, Abuja",\n Very clean serviced 3bedroom apartment in a...
3,3 bedroom flat / apartment for rent,"Close To American International School, Duru...",\nA newly built and exquisitely finished 2-bed...
4,2 bedroom flat / apartment for rent,"By American International, Durumi, Abuja",\nRent 2 bedrooms flat with bq serviced with a...
5,2 bedroom flat / apartment for rent,"Durumi, Abuja",\nStandard 2 bedrooms flat with bq serviced wi...
6,2 bedroom flat / apartment for rent,"American International School, Durumi, Abuja",\nVery sharp serviced 2bedroom apartment with\...
7,2 bedroom flat / apartment for rent,"By American International School, Durumi, Abuja",\nThis apartment is tastefully finished and ve...
8,2 bedroom flat / apartment for rent,"American School, Durumi, Abuja",\n2 bedrooms flat with bq serviced with air co...
9,2 bedroom flat / apartment for rent,"Near American Intl. Schools, Durumi, Abuja",\nIt's a clean and very spacious two bedroom f...


In [126]:
rent_table.to_csv('for-rent_in_durumi_05.csv')

In [127]:
#@title
csv1 = pd.read_csv('/content/for-rent_in_durumi_01.csv')
csv2 = pd.read_csv('/content/for-rent_in_durumi_02.csv')
csv3 = pd.read_csv('/content/for-rent_in_durumi_03.csv')
csv4 = pd.read_csv('/content/for-rent_in_durumi_04.csv')
csv5 = pd.read_csv('/content/for-rent_in_durumi_05.csv')

In [128]:
#@title
complt_csv = csv1.append(csv2)
complt_csv = complt_csv.append(csv3)
complt_csv = complt_csv.append(csv4)
complt_csv = complt_csv.append(csv5)

In [129]:
#@title
complt_csv

Unnamed: 0.1,Unnamed: 0,prop.type,address,features
0,0,2 bedroom flat / apartment for rent,"Durumi, Abuja",\nIt's a brand new two bedroom apartment in du...
1,1,3 bedroom flat / apartment for rent,"Durumi, Abuja",\nThis is a serviced three bedroom in durumi\n...
2,2,3 bedroom flat / apartment for rent,"American International School, Durumi, Abuja",\nVery clean serviced 3bedroom apartment in a ...
3,3,3 bedroom flat / apartment for rent,"Close To American International School, Duru...",\nA newly built and exquisitely finished 2-bed...
4,4,2 bedroom flat / apartment for rent,"By American International, Durumi, Abuja",\nRent 2 bedrooms flat with bq serviced with a...
...,...,...,...,...
15,15,2 bedroom flat / apartment for rent,"Durumi, Abuja",\nThis is a very clean and spacious 2 bedroom ...
16,16,2 bedroom flat / apartment for rent,"Durumi, Abuja",\nRent alert\nfantastic 2 bedroom flat\nfeatur...
17,17,2 bedroom house for rent,American International School Diplomatic Zon...,"\nA newly built, world class smart standard 2 ..."
18,18,8 bedroom detached duplex for rent,Prince And Princess Estate Abuja Diplomatic ...,\nWell renovated world class pay and moving 8 ...


In [130]:
#@title
complt_csv.to_csv('for-rent_in_durumi.csv')