### TARGET SITE: https://nigeriapropertycentre.com/for-sale/houses/showtype

Scrape the first 5 pages of this website.

Extract NIgerian house listing data. The information to be extracted for each listing is:
    
1. Listing
    
2. Property
    
3. Location
    
4. Price

5. Company

6. Phone Number

7. Properties (Bathroom, Bedrooms etc)
    
The output would be a pandas dataframe

### IMPORT NECESSARY LIBRARIES

In [1]:
# Importing the pandas library for data manipulation and analysis
import pandas as pd

# Importing the time library to introduce delays in the script, if needed
import time

# Importing BeautifulSoup from the bs4 library to parse HTML and XML documents
from bs4 import BeautifulSoup

# Importing the requests library to make HTTP requests
import requests

# Importing the random library to generate random values (e.g., for selecting random user agents)
import random

### SCRAPE THE DATA

In [11]:
# Create an empty DataFrame with appropriate columns
df = pd.DataFrame(columns=["property", "location", "price", "company", "phone", "bedrooms", "bathrooms", "toilets", "parking_spaces"])

# Base URL for the house listings, assuming the page number should be part of the URL path
base_url = "https://nigeriapropertycentre.com/for-sale/houses/showtype?page="

# List of user agents to rotate
user_agents = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
]

# Loop through the first twenty pages
all_listing_data = []  # Create an empty list to store listing data
for page in range(1, 20):
    # Randomly select a User-Agent header from the list
    headers = {'User-Agent': random.choice(user_agents)}
    
    # Construct the URL for the current page
    url = base_url + str(page)
    
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'lxml')

    # Find all property listings on the current page
    listings = soup.find_all('div', class_='row property-list highlight-property')

    for listing in listings:
        try:
            property = listing.find('h4', class_='content-title').text.strip() if listing.find('h4', class_='content-title') else None
            location = listing.find('address', class_='voffset-bottom-10').text.strip() if listing.find('address', class_='voffset-bottom-10') else None
            price = listing.find('span', class_='pull-sm-left').text.strip() if listing.find('span', class_='pull-sm-left') else None
            
            company_info = listing.find('span', class_='marketed-by pull-right hidden-xs hidden-sm text-right')
            if company_info:
                company_info = company_info.text.strip()
                company = company_info.split('\n')[0].strip()
                phone = '+' + company_info.split('n')[-1].strip()
            else:
                company = None
                phone = None

            bedrooms = listing.find('i', class_='fal fa-bed')
            bedrooms = bedrooms.find_next('span').text.strip() if bedrooms else None
            
            bathrooms = listing.find('i', class_='fal fa-bath')
            bathrooms = bathrooms.find_next('span').text.strip() if bathrooms else None
            
            toilets = listing.find('i', class_='fal fa-toilet')
            toilets = toilets.find_next('span').text.strip() if toilets else None
            
            parking_spaces = listing.find('i', class_='fal fa-car')
            parking_spaces = parking_spaces.find_next('span').text.strip() if parking_spaces else None

            # Store the data as a dictionary
            listing_data = {
                "property": property,
                "location": location,
                "price": price,
                "company": company,
                "phone": phone,
                "bedrooms": bedrooms,
                "bathrooms": bathrooms,
                "toilets": toilets,
                "parking_spaces": parking_spaces
            }
            all_listing_data.append(listing_data) 

        except AttributeError as e:
            print(f"Error parsing listing: {e}")

# Create the DataFrame after all data is collected
df = pd.DataFrame(all_listing_data)

### CHECK SIZE OF THE DATA

In [12]:
df.shape

(19, 9)

### PRINT THE DATAFRAME

In [13]:
df

Unnamed: 0,property,location,price,company,phone,bedrooms,bathrooms,toilets,parking_spaces
0,4 bedroom terraced duplex for sale,"Katampe Extension, Katampe, Abuja","₦280,000,000",Everything Property Ltd,+g Property Ltd\n\n 08135339459,4,4.0,5.0,
1,5 bedroom detached duplex for sale,"Pinnock Beach Estate, Osapa, Lekki, Lagos","$3,000,000 \napprox. ₦4,413,661,131",Apple Properties,+Apple Properties\n\n 08025286152,5,,5.0,
2,31 bedroom block of flats for sale,"Chevvy View Estate, Lekki, Lagos","₦2,200,000,000",Hampton Hills,+Hills\n\n 08035960464,31,31.0,31.0,
3,6 bedroom detached duplex for sale,"Abule Parapo, Awoyaya, Ibeju Lekki, Lagos","₦88,000,000",Golden Hilton Homes Limited,+Homes Limited\n\n 08122264426,6,6.0,8.0,6.0
4,3 bedroom terraced duplex for sale,"Urban Prime Three Phase Two, Ogombo, Ajah, Lagos","₦70,000,000",Lake Pad Ventures,+tures\n\n 08067495852,3,3.0,4.0,2.0
5,6 bedroom detached duplex for sale,"Abule Parapo, Awoyaya, Ibeju Lekki, Lagos","₦88,000,000",Golden Hilton Homes Limited,+Homes Limited\n\n 08122264426,6,6.0,8.0,6.0
6,5 bedroom semi-detached duplex for sale,"Off 1st Avenue Road, Banana Island, Ikoyi, Lagos","₦1,500,000,000",Beverly & Sam Properties,+Beverly & Sam Properties\n\n 08038156271,5,5.0,6.0,4.0
7,4 bedroom semi-detached duplex for sale,"Off Ibrahim Onashokun Street, Opposite Ifako G...","₦180,000,000",Yield Unique Services,+ique Services\n\n 2348028281437,4,2.0,3.0,6.0
8,5 bedroom semi-detached duplex for sale,"Off 1st Avenue Road, Banana Island, Ikoyi, Lagos","₦1,500,000,000",Beverly & Sam Properties,+Beverly & Sam Properties\n\n 08038156271,5,5.0,6.0,4.0
9,5 bedroom house for sale,"Off Oduduwa Crescent, Ikeja GRA, Ikeja, Lagos","₦475,000,000",Greenhaven Properties & Development Company Li...,+y Limited\n\n +2348033306666,5,,,
