# `Hilal` Web Scraping

In [19]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [20]:
base_url = "https://hilalprp.com.om"
url_template = base_url + "/properties-search/page/{}/?status=for-rent"
max_pages = 38

In [21]:
rental_properties = {
    "property_title": [],
    "property_location": [],
    "price": [],
    "area": [],
    "listing_type": []
}

In [24]:
page = 1
while page <= max_pages:
    url = url_template.format(page)
    print(f"Scraping Page {page}...")
    
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
    except Exception as e:
        print(f"Failed to fetch page {page}: {e}")
        break

    soup = BeautifulSoup(response.text, "html.parser")
    cards = soup.find_all("article", class_="rh_list_card")
    if not cards:
        print("No more listings found.")
        break

    for card in cards:
        # property title
        title_tag = card.find("h3")
        title = title_tag.get_text(strip=True) if title_tag else None

        # price
        price_tag = card.find("p", class_="price")
        price = price_tag.get_text(strip=True).replace("OMR", "").replace(",", "").strip() if price_tag else None

        # listing type
        status_tag = card.find("span", class_="status")
        listing_type = status_tag.get_text(strip=True) if status_tag else None

        # area
        area = None
        meta_wrap = card.find_all("div", class_="rh_prop_card__meta")
        for meta in meta_wrap:
            label = meta.find("span", class_="rh_meta_titles")
            value = meta.find("span", class_="figure")
            if label and value:
                label_text = label.get_text(strip=True).lower()
                val_text = value.get_text(strip=True)
                if any(keyword in label_text for keyword in ["area", "size", "sqmt"]):
                    area = val_text

        # property location (detail page)
        location = None
        detail_link_tag = card.find("a", href=True)
        if detail_link_tag:
            detail_url = detail_link_tag['href']
            if not detail_url.startswith("http"):
                detail_url = requests.compat.urljoin(base_url, detail_url)
            try:
                detail_resp = requests.get(detail_url, headers=headers, timeout=10)
                detail_resp.raise_for_status()
                detail_soup = BeautifulSoup(detail_resp.content, 'html.parser')
                location_tag = detail_soup.find("a", href=lambda x: x and "/property-city/" in x)
                if location_tag:
                    location = location_tag.get_text(strip=True)
            except Exception as e:
                print(f"Error fetching detail page {detail_url}: {e}")

        rental_properties["property_title"].append(title)
        rental_properties["property_location"].append(location)
        rental_properties["price"].append(price)
        rental_properties["area"].append(area)
        rental_properties["listing_type"].append(listing_type)

    page += 1
    time.sleep(1)

Scraping Page 1...
Scraping Page 2...
Scraping Page 3...
Scraping Page 4...
Scraping Page 5...
Scraping Page 6...
Scraping Page 7...
Scraping Page 8...
Error fetching detail page https://hilalprp.com.om/property/2-bedroom-apartment-in-al-khuwair/: HTTPSConnectionPool(host='hilalprp.com.om', port=443): Read timed out. (read timeout=10)
Scraping Page 9...
Scraping Page 10...
Scraping Page 11...
Scraping Page 12...
Scraping Page 13...
Scraping Page 14...
Scraping Page 15...
Scraping Page 16...
Scraping Page 17...
Scraping Page 18...
Scraping Page 19...
No more listings found.


In [10]:
def parse_card(card):
    # Extract property title
    title_tag = card.find("h3")
    title = title_tag.text.strip() if title_tag else "Unknown"

    # Extract price
    price_tag = card.find("p", class_="price")
    price = price_tag.text.strip().replace("OMR", "").replace(",", "").strip() if price_tag else "Unknown"

    # Extract listing type
    status_tag = card.find("span", class_="status")
    listing_type = status_tag.text.strip() if status_tag else "Unknown"

    # Extract area
    area = "Unknown"
    meta_tags = card.find_all("div", class_="rh_prop_card__meta")
    for meta in meta_tags:
        label = meta.find("span", class_="rh_meta_titles")
        value = meta.find("span", class_="figure")
        if label and value:
            label_text = label.text.strip().lower()
            val_text = value.text.strip()
            if any(word in label_text for word in ["area", "size", "sqmt"]):
                area = val_text

    # Extract location
    location_tag = card.find("div", class_="rh_prop_card__location")
    if location_tag:
        location = location_tag.text.strip()
    else:
        detail_link = card.find("a", href=True)
        location = get_location_from_detail(detail_link["href"]) if detail_link else "Unknown"

    # Append to dictionary
    rental_properties["property_title"].append(title)
    rental_properties["property_location"].append(location)
    rental_properties["price"].append(price)
    rental_properties["area"].append(area)
    rental_properties["listing_type"].append(listing_type)

In [25]:
df_hilal = pd.DataFrame(rental_properties)
df_hilal

Unnamed: 0,property_title,property_location,price,area,listing_type
0,2-BEDROOM APARTMENT,Al Ansab,300,,For Rent
1,2-BEDROOM APARTMENT,Shatti Al Qurum,500,,For Rent
2,4+1 BEDROOM TWIN VILLA,Madinat Qaboos (MQ),1500,,For Rent
3,COMMERCIAL SHOP,Ghala,370,39,For Rent
4,COMMERCIAL OFFICE SPACE,Bausher,4,,For Rent
...,...,...,...,...,...
168,2 BEDROOM APARTMENT IN (SEEB),Seeb,350,170,For Rent
169,4 BEDROOM RENOVATED DETACHED VILLA,Azaiba,-950,+-340,For Rent
170,6 BEDROOM DETACHED VILLA IN (SHATTI AL QURUM),Shatti Al Qurum,-950,+-350,For Rent
171,2 BEDROOM APARTMENT IN (BOSHER),Bausher,475,,For Rent


In [26]:
df_hilal.to_csv("hilal.csv", index=False)