In [2]:
!python -m pip install --upgrade pip

Collecting pip
  Downloading pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.2-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ----------------- ---------------------- 0.8/1.8 MB 7.5 MB/s eta 0:00:01
   ---------------------------------------- 1.8/1.8 MB 9.4 MB/s eta 0:00:00
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 25.1.1
    Uninstalling pip-25.1.1:
      Successfully uninstalled pip-25.1.1
Successfully installed pip-25.2


In [3]:
!pip install requests beautifulsoup4 pandas



In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

base_url = "https://www.realproperty.pk/houses-for-sale-islamabad?page="
headers = {"User-Agent": "Mozilla/5.0"}

all_data = []

for page in range(1, 6):  # scrape first 5 pages
    url = base_url + str(page)
    print(f"Scraping page {page} -> {url}")
    
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")
    
    listings = soup.find_all("div", class_="single-property-box")
    
    for listing in listings:
        # Title
        title_tag = listing.find("div", class_=lambda c: c and "property-for-title" in c)
        title = title_tag.get_text(strip=True) if title_tag else None

        # Price
        price_tag = listing.find("h4")
        price = price_tag.get_text(strip=True) if price_tag else "No Price"

        # Location
        location_tag = listing.find("div", class_=lambda c: c and "property-address" in c)
        location = location_tag.get_text(strip=True) if location_tag else None

        # Fallbacks for missing data
        if not title:
            title = f"House For Sale {location if location else ''}".strip() or "House For Sale"
        
        if not location:
            if "Islamabad" in title:
                location = "Islamabad"
            else:
                location = "No Location"

        # Link
        link_tag = listing.find("a")
        link = "https://www.realproperty.pk" + link_tag["href"] if link_tag else "No Link"

        all_data.append({
            "Title": title,
            "Price": price,
            "Location": location,
            "Link": link
        })
    
    time.sleep(1)

# Save
df = pd.DataFrame(all_data)
df.to_csv("realproperty_islamabad_autofilled.csv", index=False, encoding="utf-8-sig")
print("✅ Scraped total", len(df), "listings (auto-filled missing values)")
df.head(10)


Scraping page 1 -> https://www.realproperty.pk/houses-for-sale-islamabad?page=1
Scraping page 2 -> https://www.realproperty.pk/houses-for-sale-islamabad?page=2
Scraping page 3 -> https://www.realproperty.pk/houses-for-sale-islamabad?page=3
Scraping page 4 -> https://www.realproperty.pk/houses-for-sale-islamabad?page=4
Scraping page 5 -> https://www.realproperty.pk/houses-for-sale-islamabad?page=5
✅ Scraped total 200 listings (auto-filled missing values)


Unnamed: 0,Title,Price,Location,Link
0,House For Sale,PKR11.00 Crore,No Location,https://www.realproperty.pk/property/1-kanal-h...
1,House For Sale,PKR24.00 Crore,No Location,https://www.realproperty.pk/property/40x120-ho...
2,House For Sale,PKR8.00 Crore,No Location,https://www.realproperty.pk/property/kanal-hou...
3,House For Sale,PKR3.60 Crore,No Location,https://www.realproperty.pk/property/house-for...
4,House For Sale,Call For Price,No Location,https://www.realproperty.pk/property/house-for...
5,House For Sale,PKR4.80 Crore,No Location,https://www.realproperty.pk/property/house-for...
6,House For Sale,PKR1.18 Crore,No Location,https://www.realproperty.pk/property/grey-stru...
7,House For Sale,PKR6.50 Crore,No Location,https://www.realproperty.pk/property/bahria-en...
8,House For Sale,PKR16.00 Crore,No Location,https://www.realproperty.pk/property/double-st...
9,House For Sale,PKR16.00 Crore,No Location,https://www.realproperty.pk/property/5-marla-h...


In [10]:
import re

def clean_price(price_str):
    if not price_str or "No Price" in price_str:
        return None
    
    price_str = price_str.replace("PKR", "").strip()
    
    # Crore (1 crore = 10,000,000)
    match = re.search(r"([\d.]+)\s*Crore", price_str, re.IGNORECASE)
    if match:
        return float(match.group(1)) * 10000000
    
    # Lakh (1 lakh = 100,000)
    match = re.search(r"([\d.]+)\s*Lakh", price_str, re.IGNORECASE)
    if match:
        return float(match.group(1)) * 100000
    
    # Thousand
    match = re.search(r"([\d.]+)\s*Thousand", price_str, re.IGNORECASE)
    if match:
        return float(match.group(1)) * 1000
    
    # If just number
    match = re.search(r"([\d,]+)", price_str)
    if match:
        return float(match.group(1).replace(",", ""))
    
    return None


In [11]:
# Load your cleaned dataset
df = pd.read_csv("realproperty_islamabad_autofilled.csv")

# Apply cleaning function
df["Price_PKR"] = df["Price"].apply(clean_price)

# Save updated dataset
df.to_csv("realproperty_islamabad_cleaned.csv", index=False, encoding="utf-8-sig")

print("✅ Cleaned prices added")
df.head(10)


✅ Cleaned prices added


Unnamed: 0,Title,Price,Location,Link,Price_PKR
0,House For Sale,PKR11.00 Crore,No Location,https://www.realproperty.pk/property/1-kanal-h...,110000000.0
1,House For Sale,PKR24.00 Crore,No Location,https://www.realproperty.pk/property/40x120-ho...,240000000.0
2,House For Sale,PKR8.00 Crore,No Location,https://www.realproperty.pk/property/kanal-hou...,80000000.0
3,House For Sale,PKR3.60 Crore,No Location,https://www.realproperty.pk/property/house-for...,36000000.0
4,House For Sale,Call For Price,No Location,https://www.realproperty.pk/property/house-for...,
5,House For Sale,PKR4.80 Crore,No Location,https://www.realproperty.pk/property/house-for...,48000000.0
6,House For Sale,PKR1.18 Crore,No Location,https://www.realproperty.pk/property/grey-stru...,11800000.0
7,House For Sale,PKR6.50 Crore,No Location,https://www.realproperty.pk/property/bahria-en...,65000000.0
8,House For Sale,PKR16.00 Crore,No Location,https://www.realproperty.pk/property/double-st...,160000000.0
9,House For Sale,PKR16.00 Crore,No Location,https://www.realproperty.pk/property/5-marla-h...,160000000.0


In [12]:
df["Price_PKR"].mean()

np.float64(48032993.43617021)

In [13]:
df.loc[df["Price_PKR"].idxmin()]
df.loc[df["Price_PKR"].idxmax()]


Title                                           House For Sale
Price                                           PKR80.00 Crore
Location                                           No Location
Link         https://www.realproperty.pk/property/g13-full-...
Price_PKR                                          800000000.0
Name: 52, dtype: object

In [14]:
df.groupby("Location")["Price_PKR"].mean().sort_values(ascending=False)


Location
F-8, Islamabad                                 2.166667e+08
F-11, Islamabad                                1.600000e+08
Gulberg Green, Islamabad                       1.100000e+08
DHA Phase 5, DHA Defence                       1.100000e+08
Ali Pur, Islamabad                             1.066667e+08
Islamabad                                      1.016667e+08
B-17 - ock B, B-17 -  MPCHS - Multi Gardens    8.000000e+07
DHA Defence, Islamabad                         7.500000e+07
DHA Phase 2 - Sector G, DHA Phase 2            7.500000e+07
DHA Phase 1 - Sector A, DHA Phase 1            7.000000e+07
G-11, Islamabad                                6.300000e+07
DHA Phase 2 - Sector D, DHA Phase 2            5.500000e+07
Bahria Enclave, Islamabad                      4.850000e+07
No Location                                    4.803299e+07
G-8, Islamabad                                 4.800000e+07
I-11, Islamabad                                4.125000e+07
CBR Town, Islamabad            