In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import numpy as np


headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

# Base URL and template
main_url = "https://hilalprp.com.om"
join_the_url = main_url + "/properties-search/page/{}/?status=for-sale"
MAX_PAGES = 100  # Safety limit

# Data storage
properties_data = { "Title_property": [],"Location_property": [],"Bedrooms": [],"Bathrooms": [],"Garage": [],"Price": [],"Size": [],"Listing_Type": []}

# Loop through pages
for page in range(1, MAX_PAGES + 1):
    url_data = join_the_url.format(page)
    print(f"\nScraping page {page}: {url_data}")
    try:
        response = requests.get(url_data, headers=headers)
        response.raise_for_status()
    except Exception as e:
        print(f"Failed to fetch page {page}: {e}")
        break

    soup = BeautifulSoup(response.text, "html.parser")
    listings = soup.find_all("article", class_="rh_list_card rh_popup_info_map")
    
    if not listings:
        print("No listings found.")
        break

    for listing in listings:
        # Title
        title_tag = listing.find("h3")
        title = title_tag.get_text(strip=True) if title_tag else "N/A"

        # Price
        price_tag = listing.find("p", class_="price")
        price = price_tag.get_text(strip=True).replace("OMR", "").replace(",", "").strip() if price_tag else "N/A"

        # Listing Type (for-sale)
        status_tag = listing.find("span", class_="status")
        listing_type = status_tag.get_text(strip=True) if status_tag else "For Sale"

        # Meta Info
        meta_dict = {"Bedrooms": "N/A", "Bathrooms": "N/A", "Garage": "N/A", "Size": "N/A"}
        meta_wrap = listing.find_all("div", class_="rh_prop_card__meta")

        for block in meta_wrap:
            label = block.find("span", class_="rh_meta_titles")
            value = block.find("span", class_="figure")
            if label and value:
                label_text = label.get_text(strip=True).lower()
                val_text = value.get_text(strip=True)
                if "bedroom" in label_text or "room" in label_text:
                    meta_dict["Bedrooms"] = val_text
                elif "bathroom" in label_text:
                    meta_dict["Bathrooms"] = val_text
                elif "garage" in label_text:
                    meta_dict["Garage"] = val_text
                elif "area" in label_text or "size" in label_text or "sqmt" in label_text:
                    meta_dict["Size"] = val_text

        # Location (from detail page)
        location = "N/A"
        details_tag = listing.find("a", href=True)
        if details_tag:
            detailed_url_data = details_tag["href"]
            try:
                detail_respond = requests.get(detailed_url_data, headers=headers)
                detail_respond.raise_for_status()
                details_soup = BeautifulSoup(detail_respond.content, "html.parser")
                locations_data_tag = details_soup.find("a", href=lambda x: x and "/property-city/" in x)
                if locations_data_tag:
                    location = locations_data_tag.get_text(strip=True)
            except Exception as e:
                print(f"Error fetching detail page: {e}")

        # Append data
        properties_data["Title_property"].append(title)
        properties_data["Location_property"].append(location)
        properties_data["Bedrooms"].append(meta_dict["Bedrooms"])
        properties_data["Bathrooms"].append(meta_dict["Bathrooms"])
        properties_data["Garage"].append(meta_dict["Garage"])
        properties_data["Price"].append(price)
        properties_data["Size"].append(meta_dict["Size"])
        properties_data["Listing_Type"].append(listing_type)

    time.sleep(1)  # polite pause




Scraping page 1: https://hilalprp.com.om/properties-search/page/1/?status=for-sale

Scraping page 2: https://hilalprp.com.om/properties-search/page/2/?status=for-sale

Scraping page 3: https://hilalprp.com.om/properties-search/page/3/?status=for-sale

Scraping page 4: https://hilalprp.com.om/properties-search/page/4/?status=for-sale

Scraping page 5: https://hilalprp.com.om/properties-search/page/5/?status=for-sale

Scraping page 6: https://hilalprp.com.om/properties-search/page/6/?status=for-sale

Scraping page 7: https://hilalprp.com.om/properties-search/page/7/?status=for-sale

Scraping page 8: https://hilalprp.com.om/properties-search/page/8/?status=for-sale
No listings found.


In [6]:
# Convert to DataFrame
df = pd.DataFrame(properties_data)
# Export to CSV
# df.to_csv("hilal_sale_data.csv", index=False)
# print("\nSaved to hilal_sale_data.csv")

In [11]:
df

Unnamed: 0,Title_property,Location_property,Bedrooms,Bathrooms,Garage,Price,Size,Listing_Type
0,3-BEDROOM APARTMENT,Bausher,3,,SHADED,45000,,For Sale
1,3-BEDROOM VILLA,Al Mawaleh,4,,SHADED,290000,,For Sale
2,6-BEDROOM TWIN VILLA,Bausher,7,,SHADED,180000,,For Sale
3,7-BEDROOM DETACHED VILLA,Al Ansab,9,,SHADED,300000,758,For Sale
4,4-BEDROOM DETACHED VILLA,Al Hail,6,,SHADED,80000,,For Sale
...,...,...,...,...,...,...,...,...
58,6-BEDROOM DETACHED VILLA,Al Hail,+9,,SHADED OUTSIDE,300000,,For Sale
59,3 BEDROOM TOWNHOUSE,Al Khoudh,3,,UNSHADED,80000,199,For Sale
60,8 BEDROOM DETACHED VILLA IN (MAWALLEH),Al Mawaleh,8,,4,-320000,670,For Sale
61,7 BEDROOM DETACHED VILLA IN (AL KHUWAIR),Al Khuwair,8,,1,85000,,For Sale


In [33]:
df.to_csv("hilal_sales_data.csv", index=False)
print("\nSaved to hilal_sales_data.csv")


Saved to hilal_sales_data.csv
