In [1]:
from bs4 import BeautifulSoup
import requests

In [2]:
url = "https://www.bnakaran.com/en/duplex-for-sale-in-downtown-yerevan-d113745"
response = requests.get(url)

if response.status_code != 200:
    print(f"Failed to fetch the webpage. Status code: {response.status_code}")
else:
    soup = BeautifulSoup(response.content, 'html.parser')


### Area/Storey/Room

In [3]:
# Find the <li> tags and extract the text
for li in soup.select('ul.property-main-features > li'):
    key = li.get_text(strip=True).split(':')[0].strip()
    value = li.find('span').get_text(strip=True)
    if key.lower() == 'area':
        area_number = ''.join(filter(str.isdigit, value))
        print('Area:', area_number)
    elif key.lower() == 'storey':
        storey_number = ''.join(filter(str.isdigit, value.split('/')[0]))
        print('Storey:', storey_number)
    elif key.lower() == 'rooms':
        rooms_number = ''.join(filter(str.isdigit, value))
        print('Rooms:', rooms_number)

Area: 100
Storey: 5
Rooms: 3


### Images

In [4]:
# Find all <a> tags with the class "item" and the "href" attribute
image_links = [a['href'] for a in soup.find_all('a', class_='item', href=True)]

# Print the result
for link in image_links:
    print(link)

https://pic.estate.am/image/50/c9/50c9afc2d1c76caffcbb92738e52354e_s1200.jpg
https://pic.estate.am/image/bf/59/bf59ee9218901b4b0c092bc831dfa91f_s1200.jpg
https://pic.estate.am/image/c2/7a/c27a30ffc32673f38ff1209bbd17e234_s1200.jpg
https://pic.estate.am/image/73/bb/73bb906b4a9ddd261e9e0c5ebade3c4e_s1200.jpg
https://pic.estate.am/image/3f/e8/3fe806964d76caddd3785a85b92317f1_s1200.jpg
https://pic.estate.am/image/9c/ee/9ceec901c1978dda5661011c92437a51_s1200.jpg
https://pic.estate.am/image/54/f2/54f22f2f4b0c3b31e707d65fd724f8c1_s1200.jpg
https://pic.estate.am/image/be/60/be6035423f61290f8a44e403263173b0_s1200.jpg
https://pic.estate.am/image/8d/b1/8db1082c6d12d3e065e88fa29eb44576_s1200.jpg
https://pic.estate.am/image/c1/16/c11612cacb2adebdacf29de8c4e79997_s1200.jpg
https://pic.estate.am/image/a5/52/a5529d335091fd944fdf71a60fdd820c_s1200.jpg
https://pic.estate.am/image/78/6d/786d1021ea0b000cbe14c0335a2fc042_s1200.jpg
https://pic.estate.am/image/21/a0/21a0bc20455dec32e50f82f8d5fcfbba_s1200.jpg

### Details

In [5]:
# Find the <ul> tag with class "property-features" and get all <li> children
property_features = soup.find('ul', class_='property-features').find_all('li', recursive=False)

# Create a dictionary to store the details
details = {}

# Loop through each <li> element
for feature in property_features:
    # Split the text content of <li> by ':' to separate the key and value
    parts = feature.get_text(strip=True).split(':')
    # Ensure that there are at least two parts (key and value)
    if len(parts) >= 2:
        # The key is the first part, and the value is the second part
        key = parts[0].strip()
        value = parts[1].strip()
        # Add the key and value to the details dictionary
        details[key] = value

# Print the result
for key, value in details.items():
    print(f'{key}: {value}')


Construction type: stone
Renovation: renovated
Flooring: laminate
Entrance door: metal
Windows: plastic
Heating: gas boiler
Cooling: air conditioner


### Rooms

In [6]:
# Find all <ul> tags with class "property-features"
features_lists = soup.find_all('ul', class_='property-features')

# Assuming the room details are in the second <ul>
if len(features_lists) > 1:
    room_features_list = features_lists[1].find_all('li', recursive=False)
    
    # Create a dictionary to store the room details
    room_details = {}
    
    # Loop through each <li> element
    for feature_item in room_features_list:
        # Split the text content of <li> by ':' to separate the key and value
        parts = feature_item.get_text(strip=True).split(':')
        # Ensure that there are at least two parts (key and value)
        if len(parts) >= 2:
            # The key is the first part, and the value is the second part
            key, value = parts[0].strip(), parts[1].strip()
            # Add the key and value to the room details dictionary
            room_details[key] = value
    
    # Print the result
    for key, value in room_details.items():
        print(f'{key}: {value}')
else:
    print("The expected room details section was not found")


rooms: 3
bedrooms: 2
bathrooms: 3
kitchens: 1


### Utilities

In [7]:
from bs4 import BeautifulSoup

# Find the <ul> element with class "property-features margin-top-0"
utilities_list = soup.find('ul', class_='property-features margin-top-0')

# Initialize a dictionary to store the utilities information
utilities_info_dict = {}

# Check if the <ul> element is found
if utilities_list:
    # Find all <li> elements inside the <ul>
    utilities_items = utilities_list.find_all('li', recursive=False)

    # Loop through each <li> element
    for utility in utilities_items:
        # Extract the utility name (key) and its value (yes/no)
        utility_text = utility.get_text(strip=True)
        if ':' in utility_text:
            utility_key = utility_text.split(':')[0].strip()
            utility_value = utility.find('span').get_text(strip=True) if utility.find('span') else "Not specified"
            
            # Add the utility information to the dictionary if key is not empty
            if utility_key:
                utilities_info_dict[utility_key] = utility_value
        else:
            # Handle the case where the <li> element doesn't follow the expected format
            print(f"Unexpected format for utility item: {utility_text}")

    # Print the utilities information
    for key, value in utilities_info_dict.items():
        print(f'{key}: {value}')
else:
    print("Utilities information is not available.")


Unexpected format for utility item: 
Construction type: stone
Renovation: renovated
Flooring: laminate
Entrance door: metal
Windows: plastic
Heating: gas boiler
Cooling: air conditioner


### Other Details

In [8]:
from bs4 import BeautifulSoup

# Find the <ul> element with class "property-features checkboxes margin-top-0"
features_list = soup.find('ul', class_='property-features checkboxes margin-top-0')

# Initialize a list to store the features
features = []

# Check if the <ul> element is found
if features_list:
    # Find all <li> elements inside the <ul>
    feature_items = features_list.find_all('li', recursive=False)

    # Loop through each <li> element
    for feature in feature_items:
        # Extract the feature text
        feature_text = feature.get_text(strip=True)
        
        # Add the feature information to the list
        features.append(feature_text)

    # Print the features
    for feature in features:
        print(f'- {feature}')
else:
    print("Features information is not available.")


- persistent water
- electricity
- laminatе flooring
- metal door
- refrigerator
- shower
- elevator
- LED TV
- air conditioner
- kitchen tiled
- appliances
- bathroom tiled
- plastic windows
- washing machine
- natural gas
- kitchen furniture
- furnished


### Latitiude and Longitude

In [9]:
# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content of the page with BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the div element with class "yandex-map"
    yandex_map_div = soup.find('div', class_='yandex-map')

    # Check if the div element is found
    if yandex_map_div:
        # Extract the latitude and longitude from the data-x and data-y attributes
        latitude = yandex_map_div.get('data-y')
        longitude = yandex_map_div.get('data-x')

        print(f'Latitude: {latitude}, Longitude: {longitude}')
    else:
        print("Yandex map element not found on the page.")
else:
    print("Failed to retrieve the webpage. Status code:", response.status_code)


Latitude: 44.506423457672, Longitude: 40.185914685872
