In [None]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC 
from selenium_stealth import stealth
import time
import pandas as pd
import sqlite3

page_num = 1
prices = []
room_size = []
price_per_sqm = []
condo_details = []
station_element = []
condo_names = []
addresses = []
agency = []

options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument("--headless")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)

try:

  while True:
    url = f'https://www.ddproperty.com/en/property-for-rent/{page_num}?freetext=Bangkok&property_type=N&property_type_code%5B0%5D=CONDO&region_code=TH10&search=true'
    print(f'page: {page_num}')
        
    try:
      driver = webdriver.Chrome(options=options)
      stealth(driver,
              languages=["en-US", "en"],
              vendor="Google Inc.",
              platform="Win32",
              webgl_vendor="Intel Inc.",
              renderer="Intel Iris OpenGL Engine",
              fix_hairline=True,
              )

      driver.get(url)

      soup = BeautifulSoup(driver.page_source, 'html.parser')

      # Price
      prices.extend([price.text.strip() for price in soup.findAll('li', class_='list-price pull-left')])

      # room type, size, how much per sqm
      room_size.extend([ul.find('li', {'class': 'listing-floorarea pull-left'}).text.strip() if ul.find('li', {'class': 'listing-floorarea pull-left'}) else ""
                        for ul in soup.find_all('ul', {'class': 'listing-features pull-left', 'data-automation-id': 'listing-card-other-details-txt'})])
      
      price_per_sqm.extend([ul.find('li', {'class': 'listing-floorarea pull-left'}).find_next('li').text.strip() if ul.find('li', {'class': 'listing-floorarea pull-left'}) else ""
                            for ul in soup.find_all('ul', {'class': 'listing-features pull-left', 'data-automation-id': 'listing-card-other-details-txt'})])

      # Furnished?, Built
      condo_details.extend([detail.text.strip() for detail in soup.findAll('ul', class_='listing-property-type')])

      # train station, how far from station.
      station_element.extend([i for i in soup.findAll('div', class_='row')])
      station_extract = [sta.find('i', class_='pgicon pgicon-walk') for sta in station_element
                        if 'col-xs-12 col-sm-12 listing-description' in str(sta) or 'col-xs-12 col-sm-7 listing-description' in str(sta)]
      stations = [name.parent.text.strip() if 'pgicon pgicon-walk' in str(name) else "" for name in station_extract]

      # Amount of bedroom and bathroom
      room_amount = [r.find('li', class_='listing-rooms pull-left') if 'listing-rooms pull-left' in str(r) else r.find('h3', class_='h4') if r.find('h3', class_='h4') else "" for r in station_element]
      bathroom = [ba.find('span', class_='bath').text if ba and ba.find('span', class_='bath') else "" for ba in room_amount if ba]
      bedroom = [be.find('span', class_='bed').text if be and be.find('span', class_='bed') else
                be.find('span', class_='studio').text if be and be.find('span', class_='studio') else ""  
                for be in room_amount if be]

      # Condo name
      condo_names.extend([name.find('h3', class_='h4').text.strip() for name in soup.findAll('div', class_='header-container')])

      # Location
      addresses.extend([address.text.strip() for address in soup.findAll('p', class_="listing-location ellipsis")])

      agency.extend([agen.find('span', class_='name').text for agen in soup.findAll('div', class_='featured-description col-xs-12 col-sm-7')])

      link = [link.find('a', class_='nav-link').get('href') for link in station_element
                        if 'col-xs-12 col-sm-12 listing-description' in str(link) or 'col-xs-12 col-sm-7 listing-description' in str(link)]

      if soup.find('li', class_="pagination-next disabled"):
        print("this is the last page")
        break

    except Exception as e:
      print(f"Error on page {page_num}: {e}")
      break

    page_num += 1

    # scrap data in specific page   
    # if page_num > 100:
    #   break

finally:
  driver.quit()

filtered_con_names = [name.replace('For Rent - ', '').replace(', Bangkok', '').strip() for name in condo_names if name and name.startswith('For Rent - ')]

stripped_addresses = [address.lstrip('- ').strip() for address in addresses]

df = pd.DataFrame({'name':condo_names, 'price_p_month': prices, 'address': stripped_addresses, 'station': stations, 'condo_details': condo_details,
                  'room_size': room_size, 'bedroom': bedroom, 'bathroom': bathroom, 'price_per_sqm': price_per_sqm, 'agency': agency, 'link': link})

df.to_csv(r'C:\to\my\path\condo_bkk_2024_2_300.csv', index=False, encoding='utf-8-sig')

# export as SQLite
# conn = sqlite3.connect('bkk_condo.db')
# df.to_sql('condo_bkk_2024', con=conn, if_exists='append')
# conn.close()