In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Fetch the webpage
base_url = 'https://www.bdo.com.ph/properties-for-sale/vehicles'

# Set user agent header to mimic Chrome
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36'
}

## Total Pages

In [2]:
def find_total_pages(url, headers):

    # with open("index.html", "r", encoding="utf-8") as file:
        # Read the content of the HTML file
    #  html_content = file.read()
    
    # soup = BeautifulSoup(html_content, 'html.parser')
    
    response = requests.get(base_url, headers=headers)
    
    html_content = response.content

    soup = BeautifulSoup(html_content, 'html.parser')
    
    pager_total_li = soup.find('li', class_='pager-total')

    a_tag = pager_total_li.find('a')
    
    value_inside_a_tag = a_tag.text
    
    total_page = int(value_inside_a_tag)

    urls = []

    for i in range(total_page):
        if i == 0:
            urls.append('https://www.bdo.com.ph/properties-for-sale/vehicles')
            continue
        else:
            urls.append('https://www.bdo.com.ph/properties-for-sale/vehicles?page=' + str(i))

    return urls


In [3]:
pages_to_crawl = find_total_pages(url=base_url, headers=headers)

## Crawling all the pages

In [5]:
def get_cars_data_from_page(page_url, headers):
    cars = []

    response = requests.get(page_url, headers=headers)
    
    html_content = response.content

    soup = BeautifulSoup(html_content, 'html.parser')
    
    table = soup.find('table', class_="views-table")
    
    if table:
        # Find all rows within the table
        rows = table.find_all('tr')
        
        # Iterate through each row
        for row in rows:
                 
            brand_td = row.find('td', 'views-field-field-vehicle-brand')
            generic_text_td = row.find('td', 'views-field-field-generic-text-1')
            year_td = row.find('td', 'views-field-field-year')
            brand_td = row.find('td', 'views-field-field-vehicle-brand')
            mileage_td = row.find('td', 'views-field-field-mileage-str')
            color_td = row.find('td', 'views-field-field-color')
            plate_td = row.find('td', 'views-field-field-plate')
            price_td = row.find('td', 'views-field-field-property-price')
            location_td = row.find('td', 'views-field-field-vehicle-location')
    
            if brand_td is None:
                continue
                
            car = {
                "brand": brand_td.get_text(strip=True),
                "generic_text": generic_text_td.get_text(strip=True),
                "year": year_td.get_text(strip=True),
                "mileage": mileage_td.get_text(strip=True),
                "color": color_td.get_text(strip=True),
                "plate": plate_td.get_text(strip=True),
                "price": price_td.get_text(strip=True),
                "location": location_td.get_text(strip=True)
            }
    
            cars.append(car)
            
    return cars


In [6]:
import time
import random 
import math

cars = []

for page_url in pages_to_crawl:
    print("Start crawling: " + page_url)
    time_to_sleep = math.ceil(random.uniform(1, 10))
    print("Sleeping for " + str(time_to_sleep))
    time.sleep(time_to_sleep)

    crawled_cars = get_cars_data_from_page(page_url, headers)

    cars.extend(crawled_cars)
    
    print("Done crawling: " + page_url)
    print("----------------------------------------")

df = pd.DataFrame(cars)

df.to_csv('bdo_cars.csv')

print("Done writing csv data...")
    

Start crawling: https://www.bdo.com.ph/properties-for-sale/vehicles
Sleeping for 3
Done crawling: https://www.bdo.com.ph/properties-for-sale/vehicles
----------------------------------------
Start crawling: https://www.bdo.com.ph/properties-for-sale/vehicles?page=1
Sleeping for 9
Done crawling: https://www.bdo.com.ph/properties-for-sale/vehicles?page=1
----------------------------------------
Start crawling: https://www.bdo.com.ph/properties-for-sale/vehicles?page=2
Sleeping for 9
Done crawling: https://www.bdo.com.ph/properties-for-sale/vehicles?page=2
----------------------------------------
Start crawling: https://www.bdo.com.ph/properties-for-sale/vehicles?page=3
Sleeping for 5
Done crawling: https://www.bdo.com.ph/properties-for-sale/vehicles?page=3
----------------------------------------
Start crawling: https://www.bdo.com.ph/properties-for-sale/vehicles?page=4
Sleeping for 9
Done crawling: https://www.bdo.com.ph/properties-for-sale/vehicles?page=4
------------------------------