# Dubizzle Scraping:
This notebook demonstrates step-by-step web scraping from Dubizzle, from setup and request to parsing, data extraction, and saving results.

## 1. Setup & Imports

In [14]:
# import necessary libraries
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd

## 2. Define URL & Headers

In [17]:
headers = {"User-Agent": "Mozilla/5.0"}
main_url = "https://www.dubizzle.com.om"

# Prepare storage
dubizzle_data = {
    'property_name': [],
    'price': [],
    'location': [],
    'area': [],
    'bathrooms': [],
    'beds': []
}

## 3. Fetch Page

In [15]:
for page_number in range(1, 105):
    current_page_url = f"{main_url}/en/properties/properties-for-rent/?page={page_number}"
    print(f"Scraping page {page_number}: {current_page_url}")

    response = requests.get(current_page_url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    cards = soup.find_all('li', attrs={"aria-label": "Listing"})
    if not cards:
        print("No listings found on this page. Stopping.")
        break

    for card in cards:
        title = card.find('h2', class_='_562a2db2')
        dubizzle_data['property_name'].append(title.text.strip() if title else 'No title')

        price = card.find('div', attrs={'aria-label': 'Price'})
        dubizzle_data['price'].append(price.text.strip() if price else 'Price not mentioned')

        location = card.find('span', class_='f7d5e47e')
        dubizzle_data['location'].append(location.text.strip() if location else 'Location not mentioned')

        area_tag = card.find('span', attrs={'aria-label': 'Area'})
        area_value = area_tag.find('span', class_='_3e1113f0') if area_tag else None
        dubizzle_data['area'].append(area_value.text.strip() if area_value else 'Not specified')

        bath_tag = card.find('span', attrs={'aria-label': 'Bathrooms'})
        bath_value = bath_tag.find('span', class_='_3e1113f0') if bath_tag else None
        dubizzle_data['bathrooms'].append(bath_value.text.strip() if bath_value else 'Not specified')

        bed_tag = card.find('span', attrs={'aria-label': 'Beds'})
        bed_value = bed_tag.find('span', class_='_3e1113f0') if bed_tag else None
        dubizzle_data['beds'].append(bed_value.text.strip() if bed_value else 'Not specified')

    time.sleep(1.5)
    

Scraping page 1: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=1
Scraping page 2: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=2
Scraping page 3: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=3
Scraping page 4: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=4
Scraping page 5: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=5
Scraping page 6: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=6
Scraping page 7: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=7
Scraping page 8: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=8
Scraping page 9: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=9
Scraping page 10: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=10
Scraping page 11: https://www.dubizzle.com.om/en/properties/properties-for-rent/?page=11
Scraping page 12: https://www.dubizzle.

## 6. Create DataFrame

In [18]:
# build DataFrame and preview
df = pd.DataFrame(dubizzle_data)

## 7. Save to CSV

In [None]:
#export results
output_file = 'dubizzle_rent_listings.csv'
df.to_csv(output_file, index=False)
print('Saved to', output_file)
