## project

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time


# hilal sale

In [4]:
# Headers to mimic browser
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

# Base URL and template
BASE_URL = "https://hilalprp.com.om"
URL_TEMPLATE = BASE_URL + "/properties-search/page/{}/?status=for-sale"
MAX_PAGES = 100  # Safety limit

# Data storage
properties = {
    "Title": [],
    "Location": [],
    "Bedrooms": [],
    "Bathrooms": [],
    "Garage": [],
    "Price": [],
    "Size": [],
    "Listing_Type": []
}

# Loop through pages
for page in range(1, MAX_PAGES + 1):
    url = URL_TEMPLATE.format(page)
    print(f"\nScraping page {page}: {url}")
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
    except Exception as e:
        print(f"Failed to fetch page {page}: {e}")
        break

    soup = BeautifulSoup(response.text, "html.parser")
    listings = soup.find_all("article", class_="rh_list_card")
    
    if not listings:
        print("No listings found.")
        break

    for listing in listings:
        # Title
        title_tag = listing.find("h3")
        title = title_tag.get_text(strip=True) if title_tag else "N/A"

        # Price
        price_tag = listing.find("p", class_="price")
        price = price_tag.get_text(strip=True).replace("OMR", "").replace(",", "").strip() if price_tag else "N/A"

        # Listing Type (for-sale)
        status_tag = listing.find("span", class_="status")
        listing_type = status_tag.get_text(strip=True) if status_tag else "For Sale"

        # Meta Info
        meta_dict = {"Bedrooms": "N/A", "Bathrooms": "N/A", "Garage": "N/A", "Size": "N/A"}
        meta_wrap = listing.find_all("div", class_="rh_prop_card__meta")

        for block in meta_wrap:
            label = block.find("span", class_="rh_meta_titles")
            value = block.find("span", class_="figure")
            if label and value:
                label_text = label.get_text(strip=True).lower()
                val_text = value.get_text(strip=True)
                if "bedroom" in label_text or "room" in label_text:
                    meta_dict["Bedrooms"] = val_text
                elif "bathroom" in label_text:
                    meta_dict["Bathrooms"] = val_text
                elif "garage" in label_text:
                    meta_dict["Garage"] = val_text
                elif "area" in label_text or "size" in label_text or "sqmt" in label_text:
                    meta_dict["Size"] = val_text

        # Location (from detail page)
        location = "N/A"
        detail_link_tag = listing.find("a", href=True)
        if detail_link_tag:
            detail_url = detail_link_tag["href"]
            try:
                detail_resp = requests.get(detail_url, headers=headers)
                detail_resp.raise_for_status()
                detail_soup = BeautifulSoup(detail_resp.content, "html.parser")
                location_tag = detail_soup.find("a", href=lambda x: x and "/property-city/" in x)
                if location_tag:
                    location = location_tag.get_text(strip=True)
            except Exception as e:
                print(f"Error fetching detail page: {e}")

        # Append data
        properties["Title"].append(title)
        properties["Location"].append(location)
        properties["Bedrooms"].append(meta_dict["Bedrooms"])
        properties["Bathrooms"].append(meta_dict["Bathrooms"])
        properties["Garage"].append(meta_dict["Garage"])
        properties["Price"].append(price)
        properties["Size"].append(meta_dict["Size"])
        properties["Listing_Type"].append(listing_type)

    time.sleep(1)  # polite pause

# Convert to DataFrame
df = pd.DataFrame(properties)
print("\nSample data:")
print(df.head(10))

# Export to CSV
df.to_csv("hilal_sale_data.csv", index=False)
print("\nSaved to hilal_sale_data.csv")


Scraping page 1: https://hilalprp.com.om/properties-search/page/1/?status=for-sale

Scraping page 2: https://hilalprp.com.om/properties-search/page/2/?status=for-sale

Scraping page 3: https://hilalprp.com.om/properties-search/page/3/?status=for-sale

Scraping page 4: https://hilalprp.com.om/properties-search/page/4/?status=for-sale

Scraping page 5: https://hilalprp.com.om/properties-search/page/5/?status=for-sale

Scraping page 6: https://hilalprp.com.om/properties-search/page/6/?status=for-sale

Scraping page 7: https://hilalprp.com.om/properties-search/page/7/?status=for-sale

Scraping page 8: https://hilalprp.com.om/properties-search/page/8/?status=for-sale
No listings found.

Sample data:
                        Title    Location Bedrooms Bathrooms    Garage  \
0         3-BEDROOM APARTMENT     Bausher        3       N/A    SHADED   
1             3-BEDROOM VILLA  Al Mawaleh        4       N/A    SHADED   
2        6-BEDROOM TWIN VILLA     Bausher        7       N/A    SHADED   

In [6]:
df = pd.DataFrame(properties)
df

Unnamed: 0,Title,Location,Bedrooms,Bathrooms,Garage,Price,Size,Listing_Type
0,3-BEDROOM APARTMENT,Bausher,3,,SHADED,45000,,For Sale
1,3-BEDROOM VILLA,Al Mawaleh,4,,SHADED,290000,,For Sale
2,6-BEDROOM TWIN VILLA,Bausher,7,,SHADED,180000,,For Sale
3,7-BEDROOM DETACHED VILLA,Al Ansab,9,,SHADED,300000,758,For Sale
4,4-BEDROOM DETACHED VILLA,Al Hail,6,,SHADED,80000,,For Sale
...,...,...,...,...,...,...,...,...
58,6-BEDROOM DETACHED VILLA,Al Hail,+9,,SHADED OUTSIDE,300000,,For Sale
59,3 BEDROOM TOWNHOUSE,Al Khoudh,3,,UNSHADED,80000,199,For Sale
60,8 BEDROOM DETACHED VILLA IN (MAWALLEH),Al Mawaleh,8,,4,-320000,670,For Sale
61,7 BEDROOM DETACHED VILLA IN (AL KHUWAIR),Al Khuwair,8,,1,85000,,For Sale


# Data Cleaning & Integration

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Title         63 non-null     object
 1   Location      63 non-null     object
 2   Bedrooms      63 non-null     object
 3   Bathrooms     63 non-null     object
 4   Garage        63 non-null     object
 5   Price         63 non-null     object
 6   Size          63 non-null     object
 7   Listing_Type  63 non-null     object
dtypes: object(8)
memory usage: 4.1+ KB


In [8]:
df.head(30)

Unnamed: 0,Title,Location,Bedrooms,Bathrooms,Garage,Price,Size,Listing_Type
0,3-BEDROOM APARTMENT,Bausher,3.0,,SHADED,45000,,For Sale
1,3-BEDROOM VILLA,Al Mawaleh,4.0,,SHADED,290000,,For Sale
2,6-BEDROOM TWIN VILLA,Bausher,7.0,,SHADED,180000,,For Sale
3,7-BEDROOM DETACHED VILLA,Al Ansab,9.0,,SHADED,300000,758.0,For Sale
4,4-BEDROOM DETACHED VILLA,Al Hail,6.0,,SHADED,80000,,For Sale
5,5-BEDROOM TWIN VILLA,Al Ansab,7.0,,UNSHADED,700,,"For Rent, For Sale"
6,7-BEDROOM TWIN VILLA,,8.0,,SHADED,105000,573.0,For Sale
7,6+1 BEDROOM DETACHED VILLA,Bausher,7.0,,SHADED,180,,For Sale
8,4+1 BEDROOM COMPOUND VILLA,Al Khoudh,5.0,,OUTSIDE,80000,260.0,For Sale
9,7-BEDROOM DETACHED VILLA,Al Ansab,9.0,,SHADED,220,,For Sale


In [9]:
df.describe(include='all')

Unnamed: 0,Title,Location,Bedrooms,Bathrooms,Garage,Price,Size,Listing_Type
count,63,63,63,63.0,63,63,63.0,63
unique,59,18,11,1.0,10,40,41.0,2
top,7-BEDROOM DETACHED VILLA,Bausher,6,,2,85000,,For Sale
freq,2,11,13,63.0,23,5,16.0,60


In [10]:
df.Location.unique()

array(['Bausher', 'Al Mawaleh', 'Al Ansab', 'Al Hail', 'N/A', 'Al Khoudh',
       'Shatti Al Qurum', 'Maabelah', 'Barka', 'Qurum',
       'Madinat Qaboos (MQ)', 'Mutrah', 'Muscat Hills', 'Rusayl',
       'Al Ghoubrah', 'MUSCAT BAY', 'Al Khuwair', 'Salalah'], dtype=object)

In [11]:
df.isnull().sum()

Title           0
Location        0
Bedrooms        0
Bathrooms       0
Garage          0
Price           0
Size            0
Listing_Type    0
dtype: int64

In [12]:
df.duplicated().sum()

np.int64(0)

In [13]:
#checking null values
df[df.isnull().any(axis=1)]

Unnamed: 0,Title,Location,Bedrooms,Bathrooms,Garage,Price,Size,Listing_Type


In [14]:
df.dtypes

Title           object
Location        object
Bedrooms        object
Bathrooms       object
Garage          object
Price           object
Size            object
Listing_Type    object
dtype: object

In [15]:
df

Unnamed: 0,Title,Location,Bedrooms,Bathrooms,Garage,Price,Size,Listing_Type
0,3-BEDROOM APARTMENT,Bausher,3,,SHADED,45000,,For Sale
1,3-BEDROOM VILLA,Al Mawaleh,4,,SHADED,290000,,For Sale
2,6-BEDROOM TWIN VILLA,Bausher,7,,SHADED,180000,,For Sale
3,7-BEDROOM DETACHED VILLA,Al Ansab,9,,SHADED,300000,758,For Sale
4,4-BEDROOM DETACHED VILLA,Al Hail,6,,SHADED,80000,,For Sale
...,...,...,...,...,...,...,...,...
58,6-BEDROOM DETACHED VILLA,Al Hail,+9,,SHADED OUTSIDE,300000,,For Sale
59,3 BEDROOM TOWNHOUSE,Al Khoudh,3,,UNSHADED,80000,199,For Sale
60,8 BEDROOM DETACHED VILLA IN (MAWALLEH),Al Mawaleh,8,,4,-320000,670,For Sale
61,7 BEDROOM DETACHED VILLA IN (AL KHUWAIR),Al Khuwair,8,,1,85000,,For Sale
