## Libraries

In [48]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import re

Execution time: 0.0 seconds
Wall time: 997 µs


## Functions

In [49]:
def extract_price(link_soup):
    try:
        price_div = link_soup.find('div', class_='product-price__i product-price__i--bold')
        if price_div:
            price_val = price_div.find('span', class_='price-val')
            price_cur = price_div.find('span', class_='price-cur')
            if price_val and price_cur:
                price = re.sub(r'\s+', '', price_val.text.strip())  # Remove whitespace from the price value
                currency = price_cur.text.strip()
                return price, currency
    except Exception:
        pass
    return None, None


def extract_owner_category(link_soup):
    try:
        owner_category_div = link_soup.find('div', class_='product-owner__info-region')
        if owner_category_div:
            owner_category = owner_category_div.text.strip()
            return owner_category
    except Exception:
        pass
    return None


def extract_owner_name(link_soup):
    try:
        owner_name_div = link_soup.find('div', class_='product-owner__info-name')
        if owner_name_div:
            owner_name = owner_name_div.text.strip()
            return owner_name
    except Exception:
        pass
    return None


def extract_phone_number(link_soup):
    try:
        phone_number_div = link_soup.find('div', class_='product-phones__list-i')
        phone_number_a = phone_number_div.find('a') if phone_number_div else None
        phone_number = phone_number_a['href'].replace('tel:', '').replace('-', '').replace(' ', '') if phone_number_a else None
        return phone_number
    except Exception:
        pass
    return None


def extract_category(link_soup):
    try:
        category_div = link_soup.find('div', class_='product-properties__i').find('span', class_='product-properties__i-value')
        if category_div:
            category = category_div.text.strip()
            return category
    except Exception:
        pass
    return None


def extract_floor(link_soup):
    try:
        floor_div = link_soup.findAll('div', class_='product-properties__i')[1].find('span', class_='product-properties__i-value')
        if floor_div:
            floor = floor_div.text.strip()
            return floor
    except Exception:
        pass
    return None


def extract_area(link_soup):
    try:
        area_div = link_soup.findAll('div', class_='product-properties__i')[2].find('span', class_='product-properties__i-value')
        if area_div:
            area = area_div.text.strip()
            return area
    except Exception:
        pass
    return None


def extract_room_count(link_soup):
    try:
        room_count_div = link_soup.findAll('div', class_='product-properties__i')[3].find('span', class_='product-properties__i-value')
        if room_count_div:
            room_count = room_count_div.text.strip()
            return room_count
    except Exception:
        pass
    return None


def extract_description(link_soup):
    try:
        description_div = link_soup.findAll('div', class_='product-properties__i')[4].find('span', class_='product-properties__i-value')
        if description_div:
            description = description_div.text.strip()
            return description
    except Exception:
        pass
    return None


def extract_mortgage(link_soup):
    try:
        mortgage_divs = link_soup.findAll('div', class_='product-properties__i')
        if len(mortgage_divs) > 5:
            mortgage_div = mortgage_divs[5].find('span', class_='product-properties__i-value')
            if mortgage_div:
                mortgage = mortgage_div.text.strip()
                return mortgage
    except Exception:
        pass
    return None


def extract_property_info(url):
    try:
        # Chrome settings
        chrome_options = Options()
        chrome_options.add_argument('--headless')  # Run Chrome WebDriver in headless mode
        driver = webdriver.Chrome(options=chrome_options)

        driver.get(url)
        time.sleep(2)

        # Wait for the phone number element to be present
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'product-phones__btn-value')))

        # Find the element using XPath
        element = driver.find_element(By.CLASS_NAME, 'product-phones__btn-value')
        element.click()   # Click the button

        # Wait for the updated phone number element to be present
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'product-phones__list-i')))

        # Get the updated HTML after clicking the button
        updated_html = driver.page_source

        # Parse the updated HTML
        link_soup = BeautifulSoup(updated_html, 'html.parser')

        # Extract the full phone number
        phone_number_a = link_soup.find('div', class_='product-phones__list-i').find('a')
        phone_number = phone_number_a['href'].replace('tel:', '').replace('-', '').replace(' ', '') if phone_number_a else None

        # Extract the owner name
        owner_name = extract_owner_name(link_soup)

        # Extract the owner category
        owner_category = extract_owner_category(link_soup)

        # Extract other information
        category = extract_category(link_soup)
        floor = extract_floor(link_soup)
        area = extract_area(link_soup)
        room_count = extract_room_count(link_soup)
        description = extract_description(link_soup)
        mortgage = extract_mortgage(link_soup)
        price, currency = extract_price(link_soup)

        driver.quit()  # Close the WebDriver

        # Create a DataFrame with the extracted information
        data = {
            'url': [url],
            'phone number': [phone_number],
            'owner name': [owner_name],
            'owner category': [owner_category],
            'category': [category],
            'floor': [floor],
            'area': [area],
            'room count': [room_count],
            'description': [description],
            'mortgage': [mortgage],
            'price': [price],
            'currency': [currency]
        }
        df = pd.DataFrame(data)
        return df

    except Exception:
        return None

## Selenium settings

In [50]:
chrome_options = Options()
chrome_options.add_argument('--headless')  # Run Chrome WebDriver in headless mode
driver = webdriver.Chrome(options=chrome_options)

In [51]:
df_list = []  # List to store the dataframes
final_df=pd.DataFrame() # Dataframe

## Starting data extraction loop

In [56]:
%%time
for page in range(2318, 2319):
    print(f"Scraping page {page}...")
    url = f'https://bina.az/alqi-satqi?page={page}'
    driver.get(url)
    time.sleep(2)

    soup = BeautifulSoup(driver.page_source, 'html.parser')
    content = soup.find_all('div', class_='items-i')

    for item in content:
        link = 'https://bina.az' + item.a['href']
        df = extract_property_info(link)
        if df is None:
            continue
        df_list.append(df)

driver.quit()  # Close the WebDriver

# Concatenate all dataframes in the list
final_df = pd.concat(df_list, ignore_index=True)

# Process the final_df as desired

final_df


Scraping page 2318...
Wall time: 6min 14s


Unnamed: 0,url,phone number,owner name,owner category,category,floor,area,room count,description,mortgage,price,currency
0,https://bina.az/items/3545384,515025092,İbrahim,mülkiyyətçi,Torpaq,5 sot,var,,,,35000,AZN
1,https://bina.az/items/3545384,515025092,İbrahim,mülkiyyətçi,Torpaq,5 sot,var,,,,35000,AZN
2,https://bina.az/items/3545386,502093672,Tunar,vasitəçi (agent),Yeni tikili,3 / 14,65 m²,2,var,var,130000,AZN
3,https://bina.az/items/3394442,102505026,Rauf,vasitəçi (agent),Yeni tikili,3 / 12,132 m²,3,var,var,195000,AZN
4,https://bina.az/items/3545400,558222474,hidayet,vasitəçi (agent),Yeni tikili,2 / 9,80 m²,3,var,var,140000,AZN
5,https://bina.az/items/3401412,502093753,Terlan,vasitəçi (agent),Yeni tikili,5 / 17,110 m²,3,var,var,230000,AZN
6,https://bina.az/items/3545399,502093651,Asif,vasitəçi (agent),Həyət evi / Bağ evi,800 m²,10 sot,10,var,var,1250000,AZN
7,https://bina.az/items/3545391,503645007,Fərrux,mülkiyyətçi,Həyət evi / Bağ evi,82 m²,2 sot,4,var,var,97000,AZN
8,https://bina.az/items/3545392,513311243,Sima,vasitəçi (agent),Həyət evi / Bağ evi,160 m²,6 sot,6,var,var,95000,AZN
9,https://bina.az/items/3545390,512709927,Mamedov Roman,vasitəçi (agent),Yeni tikili,5 / 16,75 m²,2,var,var,217000,AZN
